예제 #1
0
def get_timeout(url):
    if "wiki.ros.org" in url or "abyz.me.uk" in url:
        return Timeout(connect=15, read=20, total=45)
    if "svn.eby-sarna.com" in url or "gitorious.org" in url:  # pragma: no cover
        return Timeout(connect=30, read=30, total=60)
    if "w3.org" in url or "pygal.org" in url:
        return Timeout(connect=30, read=30, total=45)
    if not _CI and "galaxyproject.org" in url:
        return Timeout(connect=100, read=100, total=200)

    return Timeout(connect=15, read=11, total=40)
예제 #2
0
    def _test_head_302_bypasses_cache(self):
        # HEAD 302 is completed before the caching
        from pypidb._cache import get_file_cache_session

        url = "http://gpodder.org/"

        s = get_file_cache_session("web")
        r = s.head(url, timeout=Timeout(15))
        self.assertEqual(r.status_code, 302)
        self.assertEqual(r.url, url)
        self.assertEqual(r.headers["location"], "https://gpodder.github.io")
        self.assertFalse(r.content)
        self.assertFalse(hasattr(r, "from_cache"))

        r = s.head(url, timeout=Timeout(15))
        self.assertFalse(hasattr(r, "from_cache"))
예제 #3
0
    def _test_fetch_ply_requests_session(self):
        url = "https://www.dabeaz.com/ply/"

        s = requests.Session()
        r = s.get(url, timeout=Timeout(5))
        self.assertEqual(r.status_code, 200)
        self.assertEqual(r.url, url)
예제 #4
0
    def test_head_requests_get(self):
        url = "http://gpodder.org/"

        r = requests.head(url, timeout=Timeout(15))
        self.assertEqual(r.status_code, 302)
        self.assertEqual(r.url, url)
        self.assertEqual(r.headers["location"], "https://gpodder.github.io")
        self.assertFalse(r.content)
예제 #5
0
    def _test_timeout_moksha(self):
        # mokshaproject.net is a parked domain
        url = "https://mokshaproject.net"
        from pypidb._cache import get_file_cache_session

        s = get_file_cache_session("web")
        with self.assertRaises(requests.exceptions.ConnectTimeout):
            s.get(url, timeout=Timeout(5))
예제 #6
0
    def test_fetch_ply_https(self):
        from pypidb._cache import get_file_cache_session

        url = "https://www.dabeaz.com/ply/"

        s = get_file_cache_session("web")
        r = s.get(url, timeout=Timeout(5))
        self.assertEqual(r.status_code, 200)
        self.assertNotEqual(r.url, url)
        self.assertEqual(r.url, url.replace("https://", "http://"))
예제 #7
0
    def test_vlc(self):
        from pypidb._cache import get_file_cache_session

        url = "https://wiki.videolan.org/PythonBinding"

        s = get_file_cache_session("web")
        r = s.get(url, timeout=Timeout(15))
        self.assertEqual(r.status_code, 200)
        self.assertEqual(r.url, url)
        self.assertTrue(r.content)
        self.assertIn(b'<meta name="generator" content="MediaWiki', r.content)
        self.assertIn(b"https://git.videolan.org/", r.content)
예제 #8
0
def get_with_retries(url, params=None):
    s = requests.Session()

    timeout = Timeout(100, 100, 100)

    retries = Retry(total=10,
                    read=10,
                    backoff_factor=0.1,
                    status_forcelist=[500, 502, 503, 504])

    s.mount('http://', HTTPAdapter(max_retries=retries))

    return s.get(url, params=params,
                 timeout=100).content.strip(b"\"\n").replace(b"\\n", b"\n")
예제 #9
0
    def _test_head_cachecontrol(self):
        # cache polluted by HEAD
        url = "http://gpodder.org/"

        class MoreCodesCacheController(CacheController):
            def __init__(self, *args, **kwargs):
                kwargs["status_codes"] = (200, 203, 300, 301, 302, 404)
                super(MoreCodesCacheController, self).__init__(*args, **kwargs)

        cache = DictCache()
        s = CacheControl(
            requests.Session(),
            cache=cache,
            heuristic=OneDayCache(),
            cacheable_methods=["GET", "HEAD"],
            controller_class=MoreCodesCacheController,
        )
        r = s.head(url, timeout=Timeout(15))
        self.assertEqual(r.status_code, 302)
        self.assertEqual(r.url, url)
        self.assertEqual(r.headers["location"], "https://gpodder.github.io")
        self.assertFalse(r.content)
        self.assertFalse(r.from_cache)

        r = s.get(url, timeout=Timeout(15), allow_redirects=False)
        self.assertEqual(r.status_code, 302)
        self.assertEqual(r.url, url)
        self.assertEqual(r.headers["location"], "https://gpodder.github.io")
        self.assertTrue(r.content)
        self.assertIn(b"302 Found", r.content)
        self.assertFalse(r.from_cache)

        # Re-fetch the redirect from the cache
        r = s.get(url, timeout=Timeout(15), allow_redirects=False)
        self.assertEqual(r.status_code, 302)
        self.assertEqual(r.url, url)
        self.assertEqual(r.headers["location"], "https://gpodder.github.io")
        self.assertTrue(r.content)
        self.assertIn(b"302 Found", r.content)

        self.assertTrue(r.from_cache)

        # Now allow redirects

        r = s.get(url, timeout=Timeout(15))
        self.assertEqual(r.status_code, 200)
        self.assertNotEqual(r.url, url)
        self.assertTrue(r.content)
        self.assertFalse(r.from_cache)

        # Re-fetch the redirect from the cache
        r = s.get(url, timeout=Timeout(15), allow_redirects=False)
        self.assertEqual(r.status_code, 302)
        self.assertEqual(r.url, url)
        self.assertEqual(r.headers["location"], "https://gpodder.github.io")
        self.assertTrue(r.content)
        self.assertIn(b"302 Found", r.content)

        self.assertTrue(r.from_cache)

        # Re-fetch the head from the cache
        r = s.head(url, timeout=Timeout(15))
        self.assertEqual(r.status_code, 302)
        self.assertEqual(r.url, url)
        self.assertEqual(r.headers["location"], "https://gpodder.github.io")
        self.assertTrue(r.content)
        self.assertIn(b"302 Found", r.content)
        self.assertTrue(r.from_cache)
예제 #10
0
 def _test_timeout_moksha_requests_get(self):
     url = "https://mokshaproject.net"
     r = requests.get(url, timeout=Timeout(5))
     self.assertEqual(r.status_code, 200)
     self.assertEqual(r.url, url)
예제 #11
0
 def _test_timeout_moksha_requests_session(self):
     url = "https://mokshaproject.net"
     session = requests.Session()
     r = session.get(url, timeout=Timeout(5))
     self.assertEqual(r.status_code, 200)
     self.assertEqual(r.url, url)
예제 #12
0
class HTTPSAdapter(RedirectAdapter):

    _head_timeout = Timeout(connect=10, read=5)

    def __init__(self, *args, **kwargs):
        https_exceptions = kwargs.pop("https_exceptions", [])
        super(HTTPSAdapter, self).__init__(*args, **kwargs)
        self._https_exceptions = https_exceptions

    def ignore_handle_error(self, exc, request=None):
        if not request:
            raise exc

        if request.url.startswith("https"):
            tail = request.url[8:]
            if self.prevent_https(tail):
                logger.info("downgrading to http", request.url, exc)
                new_request = request.copy()
                new_request.url = "http://" + tail
                return self.send(request, timeout=self._head_timeout)

        raise exc

    def block_redirect(self, from_url, to_url):
        pass

    def prevent_https(self, tail):
        if tail[0].isdigit():  # TODO: detect IP
            return True
        for rule in self._https_exceptions:
            if tail.startswith(rule):
                return True
        return False

    def send(self, request, *args, **kwargs):
        if request.url.startswith("https://"):
            tail = request.url[8:]
            if self.prevent_https(tail):
                logger.info("downgraded {} to http".format(request.url))
                request.url = "http://" + tail

        try:
            resp = super(HTTPSAdapter, self).send(request, *args, **kwargs)
        except Exception as e:
            resp = self.handle_error(e, request)
        return resp

    def get_redirect(self, url):
        if not url.startswith("http://"):
            return super(HTTPSAdapter, self).get_redirect(url)

        tail = url[7:]
        if self.prevent_https(tail):
            logger.info("https blocked for {}".format(url))
            return super(HTTPSAdapter, self).get_redirect(url)

        while True:
            current_url = url
            try:
                if current_url.startswith("http://code.google.com"):
                    resp = requests.get(
                        current_url, allow_redirects=False, timeout=self._head_timeout
                    )
                else:
                    resp = requests.head(
                        current_url, allow_redirects=False, timeout=self._head_timeout
                    )
                resp.raise_for_status()
            except Exception as e:
                logger.info("head failed for {}: {!r}".format(current_url, e))
                break
            else:
                logger.debug(
                    "head {} {} {} {} {}".format(
                        current_url, resp.url, resp, resp.headers, resp.content
                    )
                )
                location = resp.headers.get("location")
                if location and location != current_url:
                    code = self.block_redirect(current_url, location)
                    if code is True:
                        return super(HTTPSAdapter, self).get_redirect(url)
                    elif code:
                        return _generate_response(code)

                    code = self.block_redirect(url, location)
                    if code is True:
                        return super(HTTPSAdapter, self).get_redirect(url)
                    elif code:
                        return _generate_response(code)

                    if location.startswith("https:"):
                        tail = location[8:]
                        if self.prevent_https(tail):
                            return super(HTTPSAdapter, self).get_redirect(url)
                        return resp
                    url = location

                break

        tail = url[7:]
        if self.prevent_https(tail):
            logger.info("https blocked for {}".format(url))
            return super(HTTPSAdapter, self).get_redirect(url)
        return "https" + url[4:]
예제 #13
0
def _make_request(self,
                  conn,
                  method,
                  url,
                  timeout=Timeout.from_float(2),
                  chunked=False,
                  **httplib_request_kw):
    self.num_requests += 1
    timeout_obj = self._get_timeout(timeout)
    timeout_obj.start_connect()
    conn.timeout = timeout_obj.connect_timeout

    # Trigger any extra validation we need to do.
    try:
        self._validate_conn(conn, **httplib_request_kw)
    except (SocketTimeout, BaseSSLError) as e:
        # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
        self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
        raise

    # conn.request() calls httplib.*.request, not the method in
    # urllib3.request. It also calls makefile (recv) on the socket.
    if chunked:
        conn.request_chunked(method, url, **httplib_request_kw)
    else:
        conn.request(method, url, **httplib_request_kw)

    # Reset the timeout for the recv() on the socket
    read_timeout = timeout_obj.read_timeout

    # App Engine doesn't have a sock attr
    if getattr(conn, 'sock', None):
        # In Python 3 socket.py will catch EAGAIN and return None when you
        # try and read into the file pointer created by http.client, which
        # instead raises a BadStatusLine exception. Instead of catching
        # the exception and assuming all BadStatusLine exceptions are read
        # timeouts, check for a zero timeout before making the request.
        if read_timeout == 0:
            raise ReadTimeoutError(
                self, url, "Read timed out. (read timeout=%s)" % read_timeout)
        if read_timeout is Timeout.DEFAULT_TIMEOUT:
            conn.sock.settimeout(socket.getdefaulttimeout())
        else:  # None or a value
            conn.sock.settimeout(read_timeout)

    # Receive the response from the server
    try:
        try:  # Python 2.7, use buffering of HTTP responses
            httplib_response = conn.getresponse(buffering=True)
        except TypeError:  # Python 2.6 and older, Python 3
            try:
                httplib_response = conn.getresponse()
            except Exception as e:
                # Remove the TypeError from the exception chain in Python 3;
                # otherwise it looks like a programming error was the cause.
                six.raise_from(e, None)
    except (SocketTimeout, BaseSSLError, SocketError) as e:
        self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
        raise

    # AppEngine doesn't have a version attr.
    http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
    log.debug("%s://%s:%s \"%s %s %s\" %s %s", self.scheme, self.host,
              self.port, method, url, http_version, httplib_response.status,
              httplib_response.length)

    try:
        assert_header_parsing(httplib_response.msg)
    except HeaderParsingError as hpe:  # Platform-specific: Python 3
        log.warning('Failed to parse headers (url=%s): %s',
                    self._absolute_url(url),
                    hpe,
                    exc_info=True)

    sock = getattr(conn, 'sock', False)
    if sock:
        setattr(httplib_response, 'peer', sock.getpeername())
    else:
        setattr(httplib_response, 'peer', None)
    return httplib_response