Ejemplo n.º 1
0
def test_safename():
    cases = (
        ('http://example.org/fred/?a=b',
            'example.org,fred,a=b,58489f63a7a83c3b7794a6a398ee8b1f'),
        ('http://example.org/fred?/a=b',
            'example.org,fred,a=b,8c5946d56fec453071f43329ff0be46b'),
        ('http://www.example.org/fred?/a=b',
            'www.example.org,fred,a=b,499c44b8d844a011b67ea2c015116968'),
        ('https://www.example.org/fred?/a=b',
            'www.example.org,fred,a=b,692e843a333484ce0095b070497ab45d'),
        (httplib2.urlnorm('http://WWW')[-1],
            httplib2.safename(httplib2.urlnorm('http://www')[-1])),
        (u'http://\u2304.org/fred/?a=b',
            'xn--http,-4y1d.org,fred,a=b,579924c35db315e5a32e3d9963388193'),
    )
    for a, b in cases:
        assert httplib2.safename(a) == b

    assert httplib2.safename('http://www') != httplib2.safename('https://www')

    # Test the max length limits
    uri = 'http://' + ('w' * 200) + '.org'
    uri2 = 'http://' + ('w' * 201) + '.org'
    assert httplib2.safename(uri) != httplib2.safename(uri2)
    # Max length should be 200 + 1 (',') + 32
    assert len(httplib2.safename(uri2)) == 233
    assert len(httplib2.safename(uri)) == 233
Ejemplo n.º 2
0
def TestOneInput(data):
    fdp = atheris.FuzzedDataProvider(data)
    original = fdp.ConsumeUnicode(sys.maxsize)
    try:
        httplib2.urlnorm(original)
    except httplib2.RelativeURIError:
        return
    return
Ejemplo n.º 3
0
 def test(self):
     self.assertEqual( "http://example.org/", httplib2.urlnorm("http://example.org")[-1])
     self.assertEqual( "http://example.org/", httplib2.urlnorm("http://EXAMple.org")[-1])
     self.assertEqual( "http://example.org/?=b", httplib2.urlnorm("http://EXAMple.org?=b")[-1])
     self.assertEqual( "http://example.org/mypath?a=b", httplib2.urlnorm("http://EXAMple.org/mypath?a=b")[-1])
     self.assertEqual( "http://localhost:80/", httplib2.urlnorm("http://localhost:80")[-1])
     self.assertEqual( httplib2.urlnorm("http://localhost:80/"), httplib2.urlnorm("HTTP://LOCALHOST:80"))
     try:
         httplib2.urlnorm("/")
         self.fail("Non-absolute URIs should raise an exception")
     except httplib2.RelativeURIError:
         pass
Ejemplo n.º 4
0
    def testGet304(self):
        # Test that we use ETags properly to validate our cache
        uri = urllib.parse.urljoin(base, "304/test_etag.txt")
        (response, content) = self.http.request(uri, "GET")
        self.assertNotEqual(response['etag'], "")

        (response, content) = self.http.request(uri, "GET")
        (response, content) = self.http.request(uri, "GET", headers = {'cache-control': 'must-revalidate'})
        self.assertEqual(response.status, 200)
        self.assertEqual(response.fromcache, True)

        cache_file_name = os.path.join(cacheDirName, httplib2.safename(httplib2.urlnorm(uri)[-1]))
        f = open(cache_file_name, "r")
        status_line = f.readline()
        f.close()

        self.assertTrue(status_line.startswith("status:"))

        (response, content) = self.http.request(uri, "HEAD")
        self.assertEqual(response.status, 200)
        self.assertEqual(response.fromcache, True)

        (response, content) = self.http.request(uri, "GET", headers = {'range': 'bytes=0-0'})
        self.assertEqual(response.status, 206)
        self.assertEqual(response.fromcache, False)
Ejemplo n.º 5
0
    def _follow_redirect(self, uri, method, body, headers, response, content, max_redirects):
        """ Internal function to follow a redirect recieved by L{request} """
        (scheme, authority, absolute_uri, defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri))
        if self.cache:
            cachekey = defrag_uri
        else:
            cachekey = None

        # Pick out the location header and basically start from the beginning
        # remembering first to strip the ETag header and decrement our 'depth'
        if not response.has_key('location') and response.status != 300:
            raise httplib2.RedirectMissingLocation("Redirected but the response is missing a Location: header.", response, content)
        # Fix-up relative redirects (which violate an RFC 2616 MUST)
        if response.has_key('location'):
            location = response['location']
            (scheme, authority, path, query, fragment) = httplib2.parse_uri(location)
            if authority == None:
                response['location'] = httplib2.urlparse.urljoin(uri, location)
                logging.debug('Relative redirect: changed [%s] to [%s]' % (location, response['location']))
        if response.status == 301 and method in ["GET", "HEAD"]:
            response['-x-permanent-redirect-url'] = response['location']
            if not response.has_key('content-location'):
                response['content-location'] = absolute_uri 
            httplib2._updateCache(headers, response, content, self.cache, cachekey)
        
        headers.pop('if-none-match', None)
        headers.pop('if-modified-since', None)
        
        if response.has_key('location'):
            location = response['location']
            redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method
            return self.request(location, redirect_method, body=body, headers = headers, max_redirects = max_redirects - 1)
        else:
            raise httplib2.RedirectLimit("Redirected more times than redirection_limit allows.", response, content)
Ejemplo n.º 6
0
    def request(self, uri, method="GET", body=None, headers=None,
                max_redirects=None, connection_type=None):
        """Start an HTTP request.

        @param uri: The uri to retrieve
        @param method: (optional) The HTTP method to use. Default is 'GET'
        @param body: (optional) The request body. Default is no body.
        @param headers: (optional) Additional headers to send. Defaults
               include C{connection: keep-alive}, C{user-agent} and
               C{content-type}.
        @param max_redirects: (optional) The maximum number of redirects to
               use for this request. The class instance's max_redirects is
               default
        @param connection_type: (optional) see L{httplib2.Http.request}

        @return: (response, content) tuple

        """
        if max_redirects is None:
            max_redirects = self.max_redirects
        if headers is None:
            headers = {}
        # Prepare headers
        headers.pop('cookie', None)
        req = DummyRequest(uri, headers)
        self.cookiejar.lock.acquire()
        try:
            self.cookiejar.add_cookie_header(req)
        finally:
            self.cookiejar.lock.release()
        headers = req.headers

        # Wikimedia squids: add connection: keep-alive to request headers
        # unless overridden
        headers['connection'] = headers.pop('connection', 'keep-alive')

        # determine connection pool key and fetch connection
        (scheme, authority, request_uri, defrag_uri) = httplib2.urlnorm(
                                                        httplib2.iri2uri(uri))
        conn_key = scheme+":"+authority

        connection = self.connection_pool.pop_connection(conn_key)
        if connection is not None:
            self.connections[conn_key] = connection

        # Redirect hack: we want to regulate redirects
        follow_redirects = self.follow_redirects
        self.follow_redirects = False
        pywikibot.debug(u"%r" % (
                            (uri.replace("%7C","|"), method, body,
                            headers, max_redirects,
                            connection_type),),
                        _logger)
        try:
            (response, content) = httplib2.Http.request(
                                    self, uri, method, body, headers,
                                    max_redirects, connection_type)
        except Exception, e: # what types?
            # return exception instance to be retrieved by the calling thread
            return e
Ejemplo n.º 7
0
def test_etag_used():
    # Test that we use ETags properly to validate our cache
    cache_path = tests.get_cache_path()
    http = httplib2.Http(cache=cache_path)
    response_kwargs = dict(
        add_date=True,
        add_etag=True,
        body=b'something',
        headers={
            'cache-control': 'public,max-age=300',
        },
    )

    def handler(request):
        if request.headers.get('range'):
            return tests.http_response_bytes(status=206, **response_kwargs)
        return tests.http_response_bytes(**response_kwargs)

    with tests.server_request(handler, request_count=2) as uri:
        response, _ = http.request(uri,
                                   'GET',
                                   headers={'accept-encoding': 'identity'})
        assert response['etag'] == '"437b930db84b8079c2dd804a71936b5f"'

        http.request(uri, 'GET', headers={'accept-encoding': 'identity'})
        response, _ = http.request(
            uri,
            'GET',
            headers={
                'accept-encoding': 'identity',
                'cache-control': 'must-revalidate'
            },
        )
        assert response.status == 200
        assert response.fromcache

        # TODO: API to read cache item, at least internal to tests
        cache_file_name = os.path.join(
            cache_path, httplib2.safename(httplib2.urlnorm(uri)[-1]))
        with open(cache_file_name, 'r') as f:
            status_line = f.readline()
        assert status_line.startswith("status:")

        response, content = http.request(
            uri, 'HEAD', headers={'accept-encoding': 'identity'})
        assert response.status == 200
        assert response.fromcache

        response, content = http.request(uri,
                                         'GET',
                                         headers={
                                             'accept-encoding': 'identity',
                                             'range': 'bytes=0-0'
                                         })
        assert response.status == 206
        assert not response.fromcache
Ejemplo n.º 8
0
 def _getCachedHeader(self, uri, header):
     """Retrieve a cached value for an HTTP header."""
     (scheme, authority, request_uri, cachekey) = urlnorm(uri)
     cached_value = self.get(cachekey)
     header_start = header + ':'
     if cached_value is not None:
         for line in StringIO(cached_value):
             if line.startswith(header_start):
                 return line[len(header_start):].strip()
     return None
Ejemplo n.º 9
0
def test_norm():
    cases = (
        ('http://example.org', 'http://example.org/'),
        ('http://EXAMple.org', 'http://example.org/'),
        ('http://EXAMple.org?=b', 'http://example.org/?=b'),
        ('http://EXAMple.org/mypath?a=b', 'http://example.org/mypath?a=b'),
        ('http://localhost:80', 'http://localhost:80/'),
    )
    for a, b in cases:
        assert httplib2.urlnorm(a)[-1] == b

    assert httplib2.urlnorm('http://localhost:80/') == httplib2.urlnorm(
        'HTTP://LOCALHOST:80')

    try:
        httplib2.urlnorm('/')
        assert False, 'Non-absolute URIs should raise an exception'
    except httplib2.RelativeURIError:
        pass
Ejemplo n.º 10
0
 def _getCachedHeader(self, uri, header):
     """Retrieve a cached value for an HTTP header."""
     (scheme, authority, request_uri, cachekey) = urlnorm(uri)
     cached_value = self.get(cachekey)
     header_start = header + ':'
     if cached_value is not None:
         for line in StringIO(cached_value):
             if line.startswith(header_start):
                 return line[len(header_start):].strip()
     return None
Ejemplo n.º 11
0
def test_norm():
    cases = (
        ("http://example.org", "http://example.org/"),
        ("http://EXAMple.org", "http://example.org/"),
        ("http://EXAMple.org?=b", "http://example.org/?=b"),
        ("http://EXAMple.org/mypath?a=b", "http://example.org/mypath?a=b"),
        ("http://localhost:80", "http://localhost:80/"),
    )
    for a, b in cases:
        assert httplib2.urlnorm(a)[-1] == b

    assert httplib2.urlnorm("http://localhost:80/") == httplib2.urlnorm(
        "HTTP://LOCALHOST:80")

    try:
        httplib2.urlnorm("/")
        assert False, "Non-absolute URIs should raise an exception"
    except httplib2.RelativeURIError:
        pass
Ejemplo n.º 12
0
def test_norm():
    cases = (
        ("http://example.org", "http://example.org/"),
        ("http://EXAMple.org", "http://example.org/"),
        ("http://EXAMple.org?=b", "http://example.org/?=b"),
        ("http://EXAMple.org/mypath?a=b", "http://example.org/mypath?a=b"),
        ("http://localhost:80", "http://localhost:80/"),
    )
    for a, b in cases:
        assert httplib2.urlnorm(a)[-1] == b

    assert httplib2.urlnorm("http://localhost:80/") == httplib2.urlnorm(
        "HTTP://LOCALHOST:80"
    )

    try:
        httplib2.urlnorm("/")
        assert False, "Non-absolute URIs should raise an exception"
    except httplib2.RelativeURIError:
        pass
Ejemplo n.º 13
0
 def _get_from_cache(self, url):
     """ get a given url from the cachedir even if its expired
         or return None if no data is available
     """
     scheme = urlparse(url).scheme
     if self._http[scheme].cache:
         cached_value = self._http[scheme].cache.get(
             httplib2.urlnorm(url)[-1])
         if cached_value:
             info, content = cached_value.split('\r\n\r\n', 1)
             return content
Ejemplo n.º 14
0
 def _get_from_cache(self, url):
     """ get a given url from the cachedir even if its expired
         or return None if no data is available
     """
     http = httplib2.Http(cache=self._cachedir)
     if http.cache:
         cached_value = http.cache.get(httplib2.urlnorm(url)[-1])
         if cached_value:
             info, content = cached_value.split('\r\n\r\n', 1)
             return content
     return None
Ejemplo n.º 15
0
 def _get_from_cache(self, url):
     """ get a given url from the cachedir even if its expired
         or return None if no data is available
     """
     scheme = urlparse(url).scheme
     if self._http[scheme].cache:
         cached_value = self._http[scheme].cache.get(
             httplib2.urlnorm(url)[-1])
         if cached_value:
             info, content = cached_value.decode("utf-8").split(
                 '\r\n\r\n', 1)
             return content
Ejemplo n.º 16
0
 def _getCachedHeader(self, uri, header):
     """Retrieve a cached value for an HTTP header."""
     (scheme, authority, request_uri, cachekey) = urlnorm(uri)
     cached_value = self.get(cachekey)
     header_start = header + ':'
     if not isinstance(header_start, bytes):
         header_start = header_start.encode('utf-8')
     if cached_value is not None:
         for line in BytesIO(cached_value):
             if line.startswith(header_start):
                 return line[len(header_start):].strip()
     return None
Ejemplo n.º 17
0
def test_norm():
    cases = (
        ('http://example.org',
            'http://example.org/'),
        ('http://EXAMple.org',
            'http://example.org/'),
        ('http://EXAMple.org?=b',
            'http://example.org/?=b'),
        ('http://EXAMple.org/mypath?a=b',
            'http://example.org/mypath?a=b'),
        ('http://localhost:80',
            'http://localhost:80/'),
    )
    for a, b in cases:
        assert httplib2.urlnorm(a)[-1] == b

    assert httplib2.urlnorm('http://localhost:80/') == httplib2.urlnorm('HTTP://LOCALHOST:80')

    try:
        httplib2.urlnorm('/')
        assert False, 'Non-absolute URIs should raise an exception'
    except httplib2.RelativeURIError:
        pass
Ejemplo n.º 18
0
    def _follow_redirect(self, uri, method, body, headers, response, content,
                         max_redirects):
        """Internal function to follow a redirect recieved by L{request}"""
        (scheme, authority, absolute_uri,
         defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri))
        if self.cache:
            cachekey = defrag_uri
        else:
            cachekey = None

        # Pick out the location header and basically start from the beginning
        # remembering first to strip the ETag header and decrement our 'depth'
        if "location" not in response and response.status != 300:
            raise httplib2.RedirectMissingLocation(
                "Redirected but the response is missing a Location: header.",
                response, content)
        # Fix-up relative redirects (which violate an RFC 2616 MUST)
        if "location" in response:
            location = response['location']
            (scheme, authority, path, query,
             fragment) = httplib2.parse_uri(location)
            if authority is None:
                response['location'] = httplib2.urlparse.urljoin(uri, location)
                pywikibot.debug(
                    u"Relative redirect: changed [%s] to [%s]" %
                    (location, response['location']), _logger)
        if response.status == 301 and method in ["GET", "HEAD"]:
            response['-x-permanent-redirect-url'] = response['location']
            if "content-location" not in response:
                response['content-location'] = absolute_uri
            httplib2._updateCache(headers, response, content, self.cache,
                                  cachekey)

        headers.pop('if-none-match', None)
        headers.pop('if-modified-since', None)

        if "location" in response:
            location = response['location']
            redirect_method = (
                (response.status == 303) and
                (method not in ["GET", "HEAD"])) and "GET" or method
            return self.request(location,
                                redirect_method,
                                body=body,
                                headers=headers,
                                max_redirects=max_redirects - 1)
        else:
            raise httplib2.RedirectLimit(
                "Redirected more times than redirection_limit allows.",
                response, content)
Ejemplo n.º 19
0
def test_etag_used():
    # Test that we use ETags properly to validate our cache
    cache_path = tests.get_cache_path()
    http = httplib2.Http(cache=cache_path)
    response_kwargs = dict(
        add_date=True,
        add_etag=True,
        body=b"something",
        headers={"cache-control": "public,max-age=300"},
    )

    def handler(request):
        if request.headers.get("range"):
            return tests.http_response_bytes(status=206, **response_kwargs)
        return tests.http_response_bytes(**response_kwargs)

    with tests.server_request(handler, request_count=2) as uri:
        response, _ = http.request(uri, "GET", headers={"accept-encoding": "identity"})
        assert response["etag"] == '"437b930db84b8079c2dd804a71936b5f"'

        http.request(uri, "GET", headers={"accept-encoding": "identity"})
        response, _ = http.request(
            uri,
            "GET",
            headers={"accept-encoding": "identity", "cache-control": "must-revalidate"},
        )
        assert response.status == 200
        assert response.fromcache

        # TODO: API to read cache item, at least internal to tests
        cache_file_name = os.path.join(
            cache_path, httplib2.safename(httplib2.urlnorm(uri)[-1])
        )
        with open(cache_file_name, "r") as f:
            status_line = f.readline()
        assert status_line.startswith("status:")

        response, content = http.request(
            uri, "HEAD", headers={"accept-encoding": "identity"}
        )
        assert response.status == 200
        assert response.fromcache

        response, content = http.request(
            uri, "GET", headers={"accept-encoding": "identity", "range": "bytes=0-0"}
        )
        assert response.status == 206
        assert not response.fromcache
Ejemplo n.º 20
0
 def test(self):
     # Test that different URIs end up generating different safe names
     self.assertEqual( "example.org,fred,a=b,58489f63a7a83c3b7794a6a398ee8b1f", httplib2.safename("http://example.org/fred/?a=b"))
     self.assertEqual( "example.org,fred,a=b,8c5946d56fec453071f43329ff0be46b", httplib2.safename("http://example.org/fred?/a=b"))
     self.assertEqual( "www.example.org,fred,a=b,499c44b8d844a011b67ea2c015116968", httplib2.safename("http://www.example.org/fred?/a=b"))
     self.assertEqual( httplib2.safename(httplib2.urlnorm("http://www")[-1]), httplib2.safename(httplib2.urlnorm("http://WWW")[-1]))
     self.assertEqual( "www.example.org,fred,a=b,692e843a333484ce0095b070497ab45d", httplib2.safename("https://www.example.org/fred?/a=b"))
     self.assertNotEqual( httplib2.safename("http://www"), httplib2.safename("https://www"))
     # Test the max length limits
     uri = "http://" + ("w" * 200) + ".org"
     uri2 = "http://" + ("w" * 201) + ".org"
     self.assertNotEqual( httplib2.safename(uri2), httplib2.safename(uri))
     # Max length should be 200 + 1 (",") + 32
     self.assertEqual(233, len(httplib2.safename(uri2)))
     self.assertEqual(233, len(httplib2.safename(uri)))
     # Unicode
     if sys.version_info >= (2,3):
         self.assertEqual( "xn--http,-4y1d.org,fred,a=b,579924c35db315e5a32e3d9963388193", httplib2.safename("http://\u2304.org/fred/?a=b"))
Ejemplo n.º 21
0
def send_http_request(method, request_url, body=None, request_headers={}):

    uri = httplib2.iri2uri(request_url)
    (scheme, authority, request_uri) = httplib2.urlnorm(uri)[:3]
    address = _get_hostport(authority)
    http_client = httplib2.HTTPConnectionWithTimeout(address[0], port=address[1])
    if http_client.sock is None:
        http_client.connect()

    http_client.putrequest(method,
                           request_uri.encode(DEFAULT_HTTP_URI_CHARSET),
                           {'skip_host': 1, 'skip_accept_encoding': 1})

    for key, value in request_headers.items():
        http_client.putheader(key, value.encode(DEFAULT_HTTP_HEADER_CHARSET))
    http_client.endheaders()
    if body:
        http_client.send(body)
    return http_client.getresponse()
Ejemplo n.º 22
0
 def make_request(self, request, host):
     if request.proxy_uri in self.fm.form_headers:
         h = self.fm.form_headers.pop(request.proxy_uri)
         h.pop('content-length')
         request.headers.update(h)
     uri = request.proxy_uri.replace(request.host, host, 1)
     headers = self.clean_request_headers(request, host)
     headers['host'] = host[host.rindex('/')+1:]
     connection_type=None
     (scheme, authority, request_uri, defrag_uri) = httplib2.urlnorm(uri)
     if(self.http.use_http_proxy and scheme=='http'):
       connection_type=HTTPProxyConnectionWithTimeout
       if self.http.use_http_proxy_auth:
         headers['proxy-authorization']=self.http.http_proxy_auth
     resp, response = self.http.request(uri, method=request.method, body=str(request.body),
                                        headers=headers,connection_type=connection_type)
     self.set_response_headers(resp, response, request.host, host)
     response.request = request
     # print resp.status, uri
     return response
Ejemplo n.º 23
0
 def dump(self):
     lines = ["*** HTTP Call Info ***\n"]
     uriNorm = httplib2.urlnorm(self.callInfo['uri'])
     lines.append("{0} {1} {2}/1.1\n".format(self.callInfo['method'], uriNorm[2], uriNorm[0].upper()))
     if self.callInfo['headers'] is not None:
         for chName, chValue in self.callInfo['headers']:
             lines.append("{0} : {1}\n".format(chName, chValue))
     else: lines.append("No request headers sent")
         
     lines.append("\n")    
     lines.append(self.callBody)
     
     lines.append("\n*** HTTP Response Info ***\n")
     lines.append("HTTP/1.1 {0} {1}\n".format(self.responseInfo.status, self.responseInfo.reason))
     if self.responseHeaders is not None:
         for rhName, rhValue in self.responseHeaders.iteritems():
             lines.append("{0} : {1}\n".format(rhName, rhValue))
         
     lines.append("\n")    
     lines.append(self.responseBody)
     
     return ''.join(lines)
Ejemplo n.º 24
0
    def request(self, uri, method="GET", body=None, headers=None,
                max_redirects=None, connection_type=None):
        """Start an HTTP request.

        @param uri: The uri to retrieve
        @param method: (optional) The HTTP method to use. Default is 'GET'
        @param body: (optional) The request body. Default is no body.
        @param headers: (optional) Additional headers to send. Defaults
               include C{connection: keep-alive}, C{user-agent} and
               C{content-type}.
        @param max_redirects: (optional) The maximum number of redirects to
               use for this request. The class instance's max_redirects is
               default
        @param connection_type: (optional) see L{httplib2.Http.request}

        @return: (response, content) tuple

        """
        if max_redirects is None:
            max_redirects = self.max_redirects
        if headers is None:
            headers = {}
        # Prepare headers
        headers.pop('cookie', None)
        req = DummyRequest(uri, headers)
        self.cookiejar.add_cookie_header(req)

        headers = req.headers

        # Wikimedia squids: add connection: keep-alive to request headers
        # unless overridden
        headers['connection'] = headers.pop('connection', 'keep-alive')

        # determine connection pool key and fetch connection
        (scheme, authority, request_uri,
         defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri))
        conn_key = scheme + ":" + authority

        connection = self.connection_pool.pop_connection(conn_key)
        if connection is not None:
            self.connections[conn_key] = connection

        # Redirect hack: we want to regulate redirects
        follow_redirects = self.follow_redirects
        self.follow_redirects = False
        pywikibot.debug(u"%r" % (
            (uri.replace("%7C", "|"), method, body,
             headers, max_redirects,
             connection_type),
        ), _logger)
        try:
            if authority in config.authenticate:
                self.add_credentials(*config.authenticate[authority])

            (response, content) = httplib2.Http.request(
                self, uri, method, body, headers,
                max_redirects, connection_type
            )
        except Exception as e:  # what types?
            # return exception instance to be retrieved by the calling thread
            return e
        finally:
            self.follow_redirects = follow_redirects

        # return connection to pool
        self.connection_pool.push_connection(conn_key,
                                             self.connections[conn_key])
        del self.connections[conn_key]

        # First write cookies
        self.cookiejar.extract_cookies(DummyResponse(response), req)

        # Check for possible redirects
        redirectable_response = ((response.status == 303) or
                                 (response.status in [300, 301, 302, 307] and
                                  method in ["GET", "HEAD"]))
        if (self.follow_redirects and (max_redirects > 0) and
                redirectable_response):
            # Return directly and not unpack the values in case the result was
            # an exception, which can't be unpacked
            return self._follow_redirect(
                uri, method, body, headers, response, content, max_redirects)
        else:
            return response, content
Ejemplo n.º 25
0
 def request(self, uri, method="GET", body=None, headers=None, max_redirects=None, connection_type=None):
     """ Starts an HTTP request.
         @param uri: The uri to retrieve
         @param method: (optional) The HTTP method to use. Default is 'GET'
         @param body: (optional) The request body. Default is no body.
         @param headers: (optional) Additional headers to send. Defaults include 
                         C{connection: keep-alive}, C{user-agent} and C{content-type}.
         @param max_redirects: (optional) The maximum number of redirects to use for this request.
                               The class instances max_redirects is default
         @param connection_type: (optional) ?
         @returns: (response, content) tuple
     """ 
     if max_redirects is None:
         max_redirects = self.max_redirects
     if headers is None:
         headers = {}
     # Prepare headers
     headers.pop('cookie', None)
     req = DummyRequest(uri, headers)
     self.cookiejar.lock.acquire()
     try:
         self.cookiejar.add_cookie_header(req)
     finally:
         self.cookiejar.lock.release()
     headers = req.headers
     
     # Wikimedia squids: add connection: keep-alive to request headers unless overridden
     headers['connection'] = headers.pop('connection', 'keep-alive')
     
     # determine connection pool key and fetch connection
     (scheme, authority, request_uri, defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri))
     conn_key = scheme+":"+authority
     
     connection = self.connection_pool.pop_connection(conn_key)
     if connection is not None:
         self.connections[conn_key] = connection
     
     # Redirect hack: we want to regulate redirects
     follow_redirects = self.follow_redirects
     #print 'follow_redirects: %r %r' % (self.follow_redirects, follow_redirects)
     self.follow_redirects = False
     #print 'follow_redirects: %r %r' % (self.follow_redirects, follow_redirects)
     logging.debug('%r' % ((uri, method, headers, max_redirects, connection_type),))
     (response, content) = httplib2.Http.request(self, uri, method, body, headers, max_redirects, connection_type)
     #print 'follow_redirects: %r %r' % (self.follow_redirects, follow_redirects)
     self.follow_redirects = follow_redirects
     #print 'follow_redirects: %r %r' % (self.follow_redirects, follow_redirects)
     
     
     # return connection to pool
     self.connection_pool.push_connection(conn_key, self.connections[conn_key])
     del self.connections[conn_key]
             
     # First write cookies 
     self.cookiejar.lock.acquire()
     try:           
         self.cookiejar.extract_cookies(DummyResponse(response), req)
     finally:
         self.cookiejar.lock.release()
     
     # Check for possible redirects
     redirectable_response = ((response.status == 303) or
                              (response.status in [300, 301, 302, 307] and method in ["GET", "HEAD"]))
     if self.follow_redirects and (max_redirects > 0) and redirectable_response:
         (response, content) = self._follow_redirect(uri, method, body, headers, response, content, max_redirects)
     return (response, content)
Ejemplo n.º 26
0
    except httplib2.RelativeURIError:
        pass


@pytest.mark.parametrize(
    'data', (
        ('', ',d41d8cd98f00b204e9800998ecf8427e'),
        ('http://example.org/fred/?a=b',
            'example.orgfreda=b,58489f63a7a83c3b7794a6a398ee8b1f'),
        ('http://example.org/fred?/a=b',
            'example.orgfreda=b,8c5946d56fec453071f43329ff0be46b'),
        ('http://www.example.org/fred?/a=b',
            'www.example.orgfreda=b,499c44b8d844a011b67ea2c015116968'),
        ('https://www.example.org/fred?/a=b',
            'www.example.orgfreda=b,692e843a333484ce0095b070497ab45d'),
        (httplib2.urlnorm('http://WWW')[-1],
            httplib2.safename(httplib2.urlnorm('http://www')[-1])),
        (u'http://\u2304.org/fred/?a=b',
            '.orgfreda=b,ecaf0f97756c0716de76f593bd60a35e'),
        ('normal-resource-name.js', 'normal-resource-name.js,8ff7c46fd6e61bf4e91a0a1606954a54'),
        ('foo://dom/path/brath/carapath', 'dompathbrathcarapath,83db942781ed975c7a5b7c24039f8ca3'),
        ('with/slash', 'withslash,17cc656656bb8ce2411bd41ead56d176'),
        ('thisistoomuch' * 42, ('thisistoomuch' * 6) + 'thisistoomuc,c4553439dd179422c6acf6a8ac093eb6'),
        (u'\u043f\u0440', ',9f18c0db74a9734e9d18461e16345083'),
        (u'\u043f\u0440'.encode('utf-8'), ',9f18c0db74a9734e9d18461e16345083'),
        (b'column\tvalues/unstr.zip', 'columnvaluesunstr.zip,b9740dcd0553e11b526450ceb8f76683'),
    ), ids=str)
def test_safename(data):
    result = httplib2.safename(data[0])
    assert result == data[1]
Ejemplo n.º 27
0
        assert False, 'Non-absolute URIs should raise an exception'
    except httplib2.RelativeURIError:
        pass


@pytest.mark.parametrize('data', (
    ('', ',d41d8cd98f00b204e9800998ecf8427e'),
    ('http://example.org/fred/?a=b',
     'example.orgfreda=b,58489f63a7a83c3b7794a6a398ee8b1f'),
    ('http://example.org/fred?/a=b',
     'example.orgfreda=b,8c5946d56fec453071f43329ff0be46b'),
    ('http://www.example.org/fred?/a=b',
     'www.example.orgfreda=b,499c44b8d844a011b67ea2c015116968'),
    ('https://www.example.org/fred?/a=b',
     'www.example.orgfreda=b,692e843a333484ce0095b070497ab45d'),
    (httplib2.urlnorm('http://WWW')[-1],
     httplib2.safename(httplib2.urlnorm('http://www')[-1])),
    (u'http://\u2304.org/fred/?a=b',
     '.orgfreda=b,ecaf0f97756c0716de76f593bd60a35e'),
    ('normal-resource-name.js',
     'normal-resource-name.js,8ff7c46fd6e61bf4e91a0a1606954a54'),
    ('foo://dom/path/brath/carapath',
     'dompathbrathcarapath,83db942781ed975c7a5b7c24039f8ca3'),
    ('with/slash', 'withslash,17cc656656bb8ce2411bd41ead56d176'),
    ('thisistoomuch' * 42,
     ('thisistoomuch' * 6) + 'thisistoomuc,c4553439dd179422c6acf6a8ac093eb6'),
    (u'\u043f\u0440', ',9f18c0db74a9734e9d18461e16345083'),
    (u'\u043f\u0440'.encode('utf-8'), ',9f18c0db74a9734e9d18461e16345083'),
    (b'column\tvalues/unstr.zip',
     'columnvaluesunstr.zip,b9740dcd0553e11b526450ceb8f76683'),
),
Ejemplo n.º 28
0
     "example.orgfreda=b,58489f63a7a83c3b7794a6a398ee8b1f",
 ),
 (
     "http://example.org/fred?/a=b",
     "example.orgfreda=b,8c5946d56fec453071f43329ff0be46b",
 ),
 (
     "http://www.example.org/fred?/a=b",
     "www.example.orgfreda=b,499c44b8d844a011b67ea2c015116968",
 ),
 (
     "https://www.example.org/fred?/a=b",
     "www.example.orgfreda=b,692e843a333484ce0095b070497ab45d",
 ),
 (
     httplib2.urlnorm("http://WWW")[-1],
     httplib2.safename(httplib2.urlnorm("http://www")[-1]),
 ),
 (
     u"http://\u2304.org/fred/?a=b",
     ".orgfreda=b,ecaf0f97756c0716de76f593bd60a35e",
 ),
 (
     "normal-resource-name.js",
     "normal-resource-name.js,8ff7c46fd6e61bf4e91a0a1606954a54",
 ),
 (
     "foo://dom/path/brath/carapath",
     "dompathbrathcarapath,83db942781ed975c7a5b7c24039f8ca3",
 ),
 ("with/slash", "withslash,17cc656656bb8ce2411bd41ead56d176"),
Ejemplo n.º 29
0
 ## Strip error causing Line Feed ascii char
 import urllib2
 image_url = ''.join(image_url.split('%0A'))
 ########################################################
 ############       Finally     #########################
 #####     Replace ALL url encoding % escapes    ########
 ###  TWICE TO ACCOUNT FOR EX. %2520 --> %20 --> ' '  ###
 
 #image_url  = image_url.replace('/Flat%2520Images/', '/Flat%20Images/')
 print image_url, ' URL'
 #image_url  = image_url.replace('/Flat%2520Images/', '/Flat%20Images/')
 # image_url = urllib2.unquote(image_url)
 regex_validurl = re.compile(r'^http[s]?://.+?$', re.U)
 if regex_validurl.findall(image_url):
     import httplib2
     image_url = httplib2.urlnorm(httplib2.urllib.unquote(image_url))[-1]
 #image_url = urllib2.unquote(image_url)   #urllib2.unquote(image_url))
 ########################################################
 ########################################################
 
     print 'RRR'
     headers = {'User-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:33.0) Gecko/20100101 Firefox/33.0'}
     try:
         print image_url, destpath #.split('/' )[-1].replace('.jpg','_1200.jpg')
         #error_check = urllib.urlopen(image_url)
         #print error_check
         #urlcode_value = error_check.getcode()
         
         res = requests.get(image_url, stream=True, timeout=1, headers=headers)
         print 'ALMOST'
         urlcode_value = res.status_code
Ejemplo n.º 30
0
 def request(self, uri, method="GET", body=None, headers=None,
             redirections=httplib2.DEFAULT_MAX_REDIRECTS):
     (scheme, authority, request_uri, defrag_uri) = httplib2.urlnorm(uri)
     connection_type = (scheme == 'https') and HTTPSConnection or HTTPConnection
     return httplib2.Http.request(self, uri, method, body, headers,
                                  redirections, connection_type)
Ejemplo n.º 31
0
def url_download_file(image_url,filepath,errdir=None):
    import urllib, os, io, cStringIO, requests

    ## Split Vendor # to try again on fail of full VENDOR_STYLE_NO
    url_split = image_url.split('/')[-1]
    url_split = url_split.split('-')[1:]
    url_split = '-'.join(url_split)
    url_parent = image_url.split('/')[:-1]
    url_parent = '/'.join(url_parent)
    backupurl = image_url.replace('admin.swisswatchintl.com/Z/', 'admin.swisswatchintl.com/H/')
    backup_spliturl = os.path.join(url_parent, url_split).replace('admin.swisswatchintl.com/Z/', 'admin.swisswatchintl.com/H/')
    headers = {'User-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:33.0) Gecko/20100101 Firefox/33.0'}

    error_check = requests.get(image_url, stream=True, timeout=1, headers=headers)
    urlcode_value = error_check.status_code()
    print urlcode_value

    ### PRIMARY URL, AKA /Z/
    import httplib2
    image_url = httplib2.urlnorm(httplib2.urllib.unquote(image_url))[-1]
    print 'RRR'
    print image_url, filepath #.split('/' )[-1].replace('.jpg','_1200.jpg')
    res = requests.get(image_url, stream=True, timeout=1, headers=headers)
    print 'ALMOST'
    urlcode_value = res.status_code
    print urlcode_value
    if urlcode_value == 200:
        res = requests.get(url_split, stream=True, timeout=1, headers=headers)
        with open(filepath, 'ab+') as f:
            f.write(res.content)
            f.close()
    elif urlcode_value == 404:

        ### Split URL, /Z/
        urlsplit = os.path.join(url_parent, url_split)
        error_check = requests.get(url_split, stream=True, timeout=1, headers=headers)
        split_urlcode_value = error_check.status_code()

        ### Backup URL, AKA /H/
        error_check = requests.get(backupurl, stream=True, timeout=1, headers=headers)
        backup_urlcode_value = error_check.status_code()

        ### BackupSplit
        error_check = urllib.urlopen(backup_spliturl)
        error_check = requests.get(backup_spliturl, stream=True, timeout=1, headers=headers)
        backup_spliturlcode_value = error_check.status_code()


        if split_urlcode_value == 200:
            res = requests.get(url_split, stream=True, timeout=1, headers=headers)
            with open(filepath, 'ab+') as f:
                f.write(res.content)
                f.close()
            # print "On 2nd Attempt, Retrieved: " + urlsplit + " ---> " + filepath

        elif backup_urlcode_value == 200:
            # urllib.urlretrieve(backupurl, filepath.replace('.jpg', '_H.jpg'))
            res = requests.get(backupurl, stream=True, timeout=1, headers=headers)
            with open(filepath, 'ab+') as f:
                f.write(res.content)
                f.close()
            #print "Downloaded URL {0} Finally on 3rd and Final Attempt with Error Code {1}".format(backupurl, backup_urlcode_value)
        elif backup_spliturlcode_value == 200:
            # urllib.urlretrieve(backup_spliturl, filepath.replace('.jpg', '_HH.jpg'))
            res = requests.get(backup_spliturl, stream=True, timeout=1, headers=headers)
            with open(filepath, 'ab+') as f:
                f.write(res.content)
                f.close()
            #print "Didnt Fail Downloading URL {0} even on 3rd and Final Attempt with Error Code {1}".format(backup_spliturl, backup_spliturlcode_value)
        else:
            #print "AWFUL Totally Failed Downloading URL {0} on 2nd Attempt with Error Code {1}".format(image_url, urlcode_value)
            # print "TERRIBLE Failed Downloading URL {0} even on 3rd and Final Attempt with Error Code {1}".format(backupurl, backup_urlcode_value)
            try:
                errdir=os.path.join('/mnt','Post_Complete/Complete_Archive/MARKETPLACE/SWI/ERRORS')
                try:
                    os.makedirs(errdir, 16877)
                except OSError:
                    pass
                colorstyle = filepath.split('/')[-1][:9]
                alt        = filepath.split('/')[-1].split('_')[-1][0]
                if alt.isdigit():
                    alt = str(alt)
                elif alt == 'a':
                    alt = str(alt)
                else:
                    alt = '1'
                try:
                    #info = cStringIO.StringIO()
                    with io.open(os.path.join(os.path.abspath(errdir), colorstyle + '_' + alt + '_error404.txt'), mode='wt+') as f:

                        info = "{0},{1},{2},{3}".format(str(colorstyle), str(alt), str(urlcode_value), str(image_url))
                        outtext = unicode(info, 'utf-8')
                        print outtext
                        print >>f, outtext
                        #info.flush()
                        #f.write(info.getvalue())
                        #f.write()
                        #info.close()
                        f.flush()
                        f.close()
                except AttributeError:
                    pass

            except OSError:
                pass

    else:
        print "{0} Error:\v {1} is not a valid URL".format(urlcode_value,image_url)
Ejemplo n.º 32
0
    def request(self,
                uri,
                method="GET",
                body=None,
                headers=None,
                max_redirects=None,
                connection_type=None):
        """Start an HTTP request.

        @param uri: The uri to retrieve
        @param method: (optional) The HTTP method to use. Default is 'GET'
        @param body: (optional) The request body. Default is no body.
        @param headers: (optional) Additional headers to send. Defaults
               include C{connection: keep-alive}, C{user-agent} and
               C{content-type}.
        @param max_redirects: (optional) The maximum number of redirects to
               use for this request. The class instance's max_redirects is
               default
        @param connection_type: (optional) see L{httplib2.Http.request}

        @return: (response, content) tuple

        """
        if max_redirects is None:
            max_redirects = self.max_redirects
        if headers is None:
            headers = {}
        # Prepare headers
        headers.pop('cookie', None)
        req = DummyRequest(uri, headers)
        self.cookiejar.lock.acquire()
        try:
            self.cookiejar.add_cookie_header(req)
        finally:
            self.cookiejar.lock.release()
        headers = req.headers

        # Wikimedia squids: add connection: keep-alive to request headers
        # unless overridden
        headers['connection'] = headers.pop('connection', 'keep-alive')

        # determine connection pool key and fetch connection
        (scheme, authority, request_uri,
         defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri))
        conn_key = scheme + ":" + authority

        connection = self.connection_pool.pop_connection(conn_key)
        if connection is not None:
            self.connections[conn_key] = connection

        # Redirect hack: we want to regulate redirects
        follow_redirects = self.follow_redirects
        self.follow_redirects = False
        pywikibot.debug(
            u"%r" % ((uri.replace("%7C", "|"), method, body, headers,
                      max_redirects, connection_type), ), _logger)
        try:
            (response,
             content) = httplib2.Http.request(self, uri, method, body, headers,
                                              max_redirects, connection_type)
        except Exception, e:  # what types?
            # return exception instance to be retrieved by the calling thread
            return e
Ejemplo n.º 33
0
     "example.orgfreda=b,58489f63a7a83c3b7794a6a398ee8b1f",
 ),
 (
     "http://example.org/fred?/a=b",
     "example.orgfreda=b,8c5946d56fec453071f43329ff0be46b",
 ),
 (
     "http://www.example.org/fred?/a=b",
     "www.example.orgfreda=b,499c44b8d844a011b67ea2c015116968",
 ),
 (
     "https://www.example.org/fred?/a=b",
     "www.example.orgfreda=b,692e843a333484ce0095b070497ab45d",
 ),
 (
     httplib2.urlnorm("http://WWW")[-1],
     httplib2.safename(httplib2.urlnorm("http://www")[-1]),
 ),
 (
     u"http://\u2304.org/fred/?a=b",
     ".orgfreda=b,ecaf0f97756c0716de76f593bd60a35e",
 ),
 (
     "normal-resource-name.js",
     "normal-resource-name.js,8ff7c46fd6e61bf4e91a0a1606954a54",
 ),
 (
     "foo://dom/path/brath/carapath",
     "dompathbrathcarapath,83db942781ed975c7a5b7c24039f8ca3",
 ),
 ("with/slash", "withslash,17cc656656bb8ce2411bd41ead56d176"),
Ejemplo n.º 34
0
    def request(self, uri, method="GET", body=None, headers=None, redirections=httplib2.DEFAULT_MAX_REDIRECTS, connection_type=None):
        """request handler with thread safety hacked in"""
        try:
            if headers is None:
                headers = {}
            else:
                headers = httplib2._normalize_headers(headers)
            if not headers.has_key('user-agent'):
                headers['user-agent'] = "Python-httplib2/%s" % httplib2.__version__
            uri = httplib2.iri2uri(uri)
            (scheme, authority, request_uri, defrag_uri) = httplib2.urlnorm(uri)
            domain_port = authority.split(":")[0:2]
            if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':
                scheme = 'https'
                authority = domain_port[0]
            conn_key = scheme+":"+authority
            def get_conn(conn_key):
                if conn_key in self.connections:
                    conn = self.connections[conn_key]
                    if type(conn) is list:
                        for c in conn:
                            if not getattr(c, 'busy', True):
                                return c
                    else: return c
                    if type(conn) is list:
                        return None
            conn = get_conn(conn_key)
            if conn is None:
                if not connection_type:
                    connection_type = (scheme == 'https') and httplib2.HTTPSConnectionWithTimeout or httplib2.HTTPConnectionWithTimeout
                certs = list(self.certificates.iter(authority))
                if scheme == 'https' and certs:
                    conn = connection_type(authority, key_file=certs[0][0],
                        cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info)
                    self.connections.setdefault(conn_key, []).append(conn)
                else:
                    conn = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info)
                    self.connections.setdefault(conn_key, []).append(conn) 
                conn.set_debuglevel(httplib2.debuglevel)
            conn.busy = True
            if method in ["GET", "HEAD"] and 'range' not in headers and 'accept-encoding' not in headers:
                headers['accept-encoding'] = 'deflate, gzip'
            info = httplib2.email.Message.Message()
            cached_value = None
            if self.cache:
                cachekey = defrag_uri
                cached_value = self.cache.get(cachekey)
                if cached_value:
                    try:
                        info, content = cached_value.split('\r\n\r\n', 1)
                        feedparser = httplib2.email.FeedParser.FeedParser()
                        feedparser.feed(info)
                        info = feedparser.close()
                        feedparser._parse = None
                    except IndexError:
                        self.cache.delete(cachekey)
                        cachekey = None
                        cached_value = None
            else: cachekey = None
            if method not in ["GET", "HEAD"] and self.cache and cachekey:
                # RFC 2616 Section 13.10
                self.cache.delete(cachekey)
            if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
                if info.has_key('-x-permanent-redirect-url'):
                    (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
                    response.previous = Response(info)
                    response.previous.fromcache = True
                else:
                    entry_disposition = httplib2._entry_disposition(info, headers) 
                    if entry_disposition == "FRESH":
                        if not cached_value:
                            info['status'] = '504'
                            content = ""
                        response = Response(info)
                        if cached_value:
                            response.fromcache = True
                        return (response, content)
                    if entry_disposition == "STALE":
                        if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
                            headers['if-none-match'] = info['etag']
                        if info.has_key('last-modified') and not 'last-modified' in headers:
                            headers['if-modified-since'] = info['last-modified']
                    elif entry_disposition == "TRANSPARENT": pass
                    (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
                if response.status == 304 and method == "GET":
                    # Rewrite the cache entry with the new end-to-end headers
                    # Take all headers that are in response 
                    # and overwrite their values in info.
                    # unless they are hop-by-hop, or are listed in the connection header.

                    for key in httplib2._get_end2end_headers(response):
                        info[key] = response[key]
                    merged_response = Response(info)
                    if hasattr(response, "_stale_digest"):
                        merged_response._stale_digest = response._stale_digest
                    httplib2._updateCache(headers, merged_response, content, self.cache, cachekey)
                    response = merged_response
                    response.status = 200
                    response.fromcache = True 

                elif response.status == 200:
                    content = new_content
                else:
                    self.cache.delete(cachekey)
                    content = new_content 
            else: 
                cc = httplib2._parse_cache_control(headers)
                if cc.has_key('only-if-cached'):
                    info['status'] = '504'
                    response = Response(info)
                    content = ""
                else:
                    (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
        except Exception, e:
            if self.force_exception_to_status_code:
                if isinstance(e, httplib2.HttpLib2ErrorWithResponse):
                    response = e.response
                    content = e.content
                    response.status = 500
                    response.reason = str(e) 
                elif isinstance(e, socket.timeout):
                    content = "Request Timeout"
                    response = Response( {
                            "content-type": "text/plain",
                            "status": "408",
                            "content-length": len(content)
                            })
                    response.reason = "Request Timeout"
                else:
                    content = str(e) 
                    response = Response( {
                            "content-type": "text/plain",
                            "status": "400",
                            "content-length": len(content)
                            })
                    response.reason = "Bad Request" 
            else: raise