Esempio n. 1
0
 def get_cache_timeout_response(self, 
                                 url, 
                                 http_timeout_in_seconds = 20, 
                                 max_cache_age_seconds = (1) * (24 * 60 * 60), # (number of days) * (number of seconds in a day), 
                                 header_addons = {}):
     http_cached = httplib2.Http(self.CACHE_DIR, timeout=http_timeout_in_seconds)
     header_dict = {'cache-control':'max-age='+str(max_cache_age_seconds)}
     header_dict.update(header_addons)
             
     cache_read = http_cached.cache.get(url)
     if (cache_read):
         (response, content) = cache_read.split("\r\n\r\n", 1)
     else:
         (response, content) = http_cached.request(url, headers=header_dict)
         response['cache-control'] = "max-age=" + str(max_cache_age_seconds)
         httplib2._updateCache(header_dict, response, content, http_cached.cache, url)
         if response.fromcache:
             self.status["count_got_response_from_cache"] += 1
         else:
             self.status["count_missed_cache"] += 1
             self.status["count_cache_miss_details"] = str(self.status["count_cache_miss_details"]) + "; " + url
             self.status["count_cache_miss_response"] = str(response)
             self.status["count_api_requests"] += 1
             
         if False:    
             self.status["count_request_exception"] = "EXCEPTION!"
             self.status["count_uncached_call"] += 1
             self.status["count_api_requests"] += 1
             #(response, content) = http_cached.request(url, headers=header_dict.update({'cache-control':'no-cache'}))
             req = urllib2.Request(url, headers=header_dict)
             uh = urllib2.urlopen(req)
             content = uh.read()
             response = uh.info()
     
     return(response, content)
Esempio n. 2
0
    def _follow_redirect(self, uri, method, body, headers, response, content, max_redirects):
        """ Internal function to follow a redirect recieved by L{request} """
        (scheme, authority, absolute_uri, defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri))
        if self.cache:
            cachekey = defrag_uri
        else:
            cachekey = None

        # Pick out the location header and basically start from the beginning
        # remembering first to strip the ETag header and decrement our 'depth'
        if not response.has_key('location') and response.status != 300:
            raise httplib2.RedirectMissingLocation("Redirected but the response is missing a Location: header.", response, content)
        # Fix-up relative redirects (which violate an RFC 2616 MUST)
        if response.has_key('location'):
            location = response['location']
            (scheme, authority, path, query, fragment) = httplib2.parse_uri(location)
            if authority == None:
                response['location'] = httplib2.urlparse.urljoin(uri, location)
                logging.debug('Relative redirect: changed [%s] to [%s]' % (location, response['location']))
        if response.status == 301 and method in ["GET", "HEAD"]:
            response['-x-permanent-redirect-url'] = response['location']
            if not response.has_key('content-location'):
                response['content-location'] = absolute_uri 
            httplib2._updateCache(headers, response, content, self.cache, cachekey)
        
        headers.pop('if-none-match', None)
        headers.pop('if-modified-since', None)
        
        if response.has_key('location'):
            location = response['location']
            redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method
            return self.request(location, redirect_method, body=body, headers = headers, max_redirects = max_redirects - 1)
        else:
            raise httplib2.RedirectLimit("Redirected more times than redirection_limit allows.", response, content)
Esempio n. 3
0
    def _follow_redirect(self, uri, method, body, headers, response, content,
                         max_redirects):
        """Internal function to follow a redirect recieved by L{request}"""
        (scheme, authority, absolute_uri,
         defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri))
        if self.cache:
            cachekey = defrag_uri
        else:
            cachekey = None

        # Pick out the location header and basically start from the beginning
        # remembering first to strip the ETag header and decrement our 'depth'
        if "location" not in response and response.status != 300:
            raise httplib2.RedirectMissingLocation(
                "Redirected but the response is missing a Location: header.",
                response, content)
        # Fix-up relative redirects (which violate an RFC 2616 MUST)
        if "location" in response:
            location = response['location']
            (scheme, authority, path, query,
             fragment) = httplib2.parse_uri(location)
            if authority is None:
                response['location'] = httplib2.urlparse.urljoin(uri, location)
                pywikibot.debug(
                    u"Relative redirect: changed [%s] to [%s]" %
                    (location, response['location']), _logger)
        if response.status == 301 and method in ["GET", "HEAD"]:
            response['-x-permanent-redirect-url'] = response['location']
            if "content-location" not in response:
                response['content-location'] = absolute_uri
            httplib2._updateCache(headers, response, content, self.cache,
                                  cachekey)

        headers.pop('if-none-match', None)
        headers.pop('if-modified-since', None)

        if "location" in response:
            location = response['location']
            redirect_method = (
                (response.status == 303) and
                (method not in ["GET", "HEAD"])) and "GET" or method
            return self.request(location,
                                redirect_method,
                                body=body,
                                headers=headers,
                                max_redirects=max_redirects - 1)
        else:
            raise httplib2.RedirectLimit(
                "Redirected more times than redirection_limit allows.",
                response, content)
Esempio n. 4
0
  def OverrideRequest(self, conn, host, absolute_uri, request_uri, method,
                      body, headers, redirections, cachekey):
    """Do the actual request using the connection object.

    Also follow one level of redirects if necessary.
    """

    auths = ([(auth.depth(request_uri), auth) for auth in self.authorizations
              if auth.inscope(host, request_uri)])
    auth = auths and sorted(auths)[0][1] or None
    if auth:
      auth.request(method, request_uri, headers, body)

    (response, content) = self._conn_request(conn, request_uri, method, body,
                                             headers)

    if auth:
      if auth.response(response, body):
        auth.request(method, request_uri, headers, body)
        (response, content) = self._conn_request(conn, request_uri, method,
                                                 body, headers)
        response._stale_digest = 1

    if response.status == 401:
      for authorization in self._auth_from_challenge(
          host, request_uri, headers, response, content):
        authorization.request(method, request_uri, headers, body)
        (response, content) = self._conn_request(conn, request_uri, method,
                                                 body, headers)
        if response.status != 401:
          self.authorizations.append(authorization)
          authorization.response(response, body)
          break

    if (self.follow_all_redirects or (method in ["GET", "HEAD"])
        or response.status == 303):
      if self.follow_redirects and response.status in [300, 301, 302,
                                                       303, 307]:
        # Pick out the location header and basically start from the beginning
        # remembering first to strip the ETag header and decrement our 'depth'
        if redirections:
          if not response.has_key('location') and response.status != 300:
            raise httplib2.RedirectMissingLocation(
                "Redirected but the response is missing a Location: header.",
                response, content)
          # Fix-up relative redirects (which violate an RFC 2616 MUST)
          if response.has_key('location'):
            location = response['location']
            (scheme, authority, path, query, fragment) = parse_uri(location)
            if authority == None:
              response['location'] = urlparse.urljoin(absolute_uri, location)
          if response.status == 301 and method in ["GET", "HEAD"]:
            response['-x-permanent-redirect-url'] = response['location']
            if not response.has_key('content-location'):
              response['content-location'] = absolute_uri
            httplib2._updateCache(headers, response, content, self.cache,
                                  cachekey)
          if headers.has_key('if-none-match'):
            del headers['if-none-match']
          if headers.has_key('if-modified-since'):
            del headers['if-modified-since']
          if ('authorization' in headers and
              not self.forward_authorization_headers):
            del headers['authorization']
          if response.has_key('location'):
            location = response['location']
            old_response = copy.deepcopy(response)
            if not old_response.has_key('content-location'):
              old_response['content-location'] = absolute_uri
            redirect_method = method
            if response.status in [302, 303]:
              redirect_method = "GET"
              body = None
            (response, content) = self.request(
                location, redirect_method, body=body, headers=headers,
                redirections=redirections-1,
                connection_type=conn.__class__)
            response.previous = old_response
        else:
          raise httplib2.RedirectLimit(
              "Redirected more times than redirection_limit allows.",
              response, content)
      elif response.status in [200, 203] and method in ["GET", "HEAD"]:
        # Don't cache 206's since we aren't going to handle byte range
        # requests
        if not response.has_key('content-location'):
          response['content-location'] = absolute_uri
        httplib2._updateCache(headers, response, content, self.cache,
                              cachekey)

    return (response, content)
Esempio n. 5
0
    def OverrideRequest(self, conn, host, absolute_uri, request_uri, method,
                        body, headers, redirections, cachekey):
        """Do the actual request using the connection object.

    Also follow one level of redirects if necessary.
    """

        auths = ([(auth.depth(request_uri), auth)
                  for auth in self.authorizations
                  if auth.inscope(host, request_uri)])
        auth = auths and sorted(auths)[0][1] or None
        if auth:
            auth.request(method, request_uri, headers, body)

        (response, content) = self._conn_request(conn, request_uri, method,
                                                 body, headers)

        if auth:
            if auth.response(response, body):
                auth.request(method, request_uri, headers, body)
                (response,
                 content) = self._conn_request(conn, request_uri, method, body,
                                               headers)
                response._stale_digest = 1

        if response.status == 401:
            for authorization in self._auth_from_challenge(
                    host, request_uri, headers, response, content):
                authorization.request(method, request_uri, headers, body)
                (response,
                 content) = self._conn_request(conn, request_uri, method, body,
                                               headers)
                if response.status != 401:
                    self.authorizations.append(authorization)
                    authorization.response(response, body)
                    break

        if (self.follow_all_redirects or (method in ["GET", "HEAD"])
                or response.status == 303):
            if self.follow_redirects and response.status in [
                    300, 301, 302, 303, 307
            ]:
                # Pick out the location header and basically start from the beginning
                # remembering first to strip the ETag header and decrement our 'depth'
                if redirections:
                    if not response.has_key(
                            'location') and response.status != 300:
                        raise httplib2.RedirectMissingLocation(
                            "Redirected but the response is missing a Location: header.",
                            response, content)
                    # Fix-up relative redirects (which violate an RFC 2616 MUST)
                    if response.has_key('location'):
                        location = response['location']
                        (scheme, authority, path, query,
                         fragment) = parse_uri(location)
                        if authority == None:
                            response['location'] = urlparse.urljoin(
                                absolute_uri, location)
                    if response.status == 301 and method in ["GET", "HEAD"]:
                        response['-x-permanent-redirect-url'] = response[
                            'location']
                        if not response.has_key('content-location'):
                            response['content-location'] = absolute_uri
                        httplib2._updateCache(headers, response, content,
                                              self.cache, cachekey)
                    if headers.has_key('if-none-match'):
                        del headers['if-none-match']
                    if headers.has_key('if-modified-since'):
                        del headers['if-modified-since']
                    if ('authorization' in headers
                            and not self.forward_authorization_headers):
                        del headers['authorization']
                    if response.has_key('location'):
                        location = response['location']
                        old_response = copy.deepcopy(response)
                        if not old_response.has_key('content-location'):
                            old_response['content-location'] = absolute_uri
                        redirect_method = method
                        if response.status in [302, 303]:
                            redirect_method = "GET"
                            body = None
                        (response, content) = self.request(
                            location,
                            redirect_method,
                            body=body,
                            headers=headers,
                            redirections=redirections - 1,
                            connection_type=conn.__class__)
                        response.previous = old_response
                else:
                    raise httplib2.RedirectLimit(
                        "Redirected more times than redirection_limit allows.",
                        response, content)
            elif response.status in [200, 203] and method in ["GET", "HEAD"]:
                # Don't cache 206's since we aren't going to handle byte range
                # requests
                if not response.has_key('content-location'):
                    response['content-location'] = absolute_uri
                httplib2._updateCache(headers, response, content, self.cache,
                                      cachekey)

        return (response, content)
Esempio n. 6
0
    def request(self, uri, method="GET", body=None, headers=None, redirections=httplib2.DEFAULT_MAX_REDIRECTS, connection_type=None):
        """request handler with thread safety hacked in"""
        try:
            if headers is None:
                headers = {}
            else:
                headers = httplib2._normalize_headers(headers)
            if not headers.has_key('user-agent'):
                headers['user-agent'] = "Python-httplib2/%s" % httplib2.__version__
            uri = httplib2.iri2uri(uri)
            (scheme, authority, request_uri, defrag_uri) = httplib2.urlnorm(uri)
            domain_port = authority.split(":")[0:2]
            if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':
                scheme = 'https'
                authority = domain_port[0]
            conn_key = scheme+":"+authority
            def get_conn(conn_key):
                if conn_key in self.connections:
                    conn = self.connections[conn_key]
                    if type(conn) is list:
                        for c in conn:
                            if not getattr(c, 'busy', True):
                                return c
                    else: return c
                    if type(conn) is list:
                        return None
            conn = get_conn(conn_key)
            if conn is None:
                if not connection_type:
                    connection_type = (scheme == 'https') and httplib2.HTTPSConnectionWithTimeout or httplib2.HTTPConnectionWithTimeout
                certs = list(self.certificates.iter(authority))
                if scheme == 'https' and certs:
                    conn = connection_type(authority, key_file=certs[0][0],
                        cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info)
                    self.connections.setdefault(conn_key, []).append(conn)
                else:
                    conn = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info)
                    self.connections.setdefault(conn_key, []).append(conn) 
                conn.set_debuglevel(httplib2.debuglevel)
            conn.busy = True
            if method in ["GET", "HEAD"] and 'range' not in headers and 'accept-encoding' not in headers:
                headers['accept-encoding'] = 'deflate, gzip'
            info = httplib2.email.Message.Message()
            cached_value = None
            if self.cache:
                cachekey = defrag_uri
                cached_value = self.cache.get(cachekey)
                if cached_value:
                    try:
                        info, content = cached_value.split('\r\n\r\n', 1)
                        feedparser = httplib2.email.FeedParser.FeedParser()
                        feedparser.feed(info)
                        info = feedparser.close()
                        feedparser._parse = None
                    except IndexError:
                        self.cache.delete(cachekey)
                        cachekey = None
                        cached_value = None
            else: cachekey = None
            if method not in ["GET", "HEAD"] and self.cache and cachekey:
                # RFC 2616 Section 13.10
                self.cache.delete(cachekey)
            if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
                if info.has_key('-x-permanent-redirect-url'):
                    (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
                    response.previous = Response(info)
                    response.previous.fromcache = True
                else:
                    entry_disposition = httplib2._entry_disposition(info, headers) 
                    if entry_disposition == "FRESH":
                        if not cached_value:
                            info['status'] = '504'
                            content = ""
                        response = Response(info)
                        if cached_value:
                            response.fromcache = True
                        return (response, content)
                    if entry_disposition == "STALE":
                        if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
                            headers['if-none-match'] = info['etag']
                        if info.has_key('last-modified') and not 'last-modified' in headers:
                            headers['if-modified-since'] = info['last-modified']
                    elif entry_disposition == "TRANSPARENT": pass
                    (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
                if response.status == 304 and method == "GET":
                    # Rewrite the cache entry with the new end-to-end headers
                    # Take all headers that are in response 
                    # and overwrite their values in info.
                    # unless they are hop-by-hop, or are listed in the connection header.

                    for key in httplib2._get_end2end_headers(response):
                        info[key] = response[key]
                    merged_response = Response(info)
                    if hasattr(response, "_stale_digest"):
                        merged_response._stale_digest = response._stale_digest
                    httplib2._updateCache(headers, merged_response, content, self.cache, cachekey)
                    response = merged_response
                    response.status = 200
                    response.fromcache = True 

                elif response.status == 200:
                    content = new_content
                else:
                    self.cache.delete(cachekey)
                    content = new_content 
            else: 
                cc = httplib2._parse_cache_control(headers)
                if cc.has_key('only-if-cached'):
                    info['status'] = '504'
                    response = Response(info)
                    content = ""
                else:
                    (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
        except Exception, e:
            if self.force_exception_to_status_code:
                if isinstance(e, httplib2.HttpLib2ErrorWithResponse):
                    response = e.response
                    content = e.content
                    response.status = 500
                    response.reason = str(e) 
                elif isinstance(e, socket.timeout):
                    content = "Request Timeout"
                    response = Response( {
                            "content-type": "text/plain",
                            "status": "408",
                            "content-length": len(content)
                            })
                    response.reason = "Request Timeout"
                else:
                    content = str(e) 
                    response = Response( {
                            "content-type": "text/plain",
                            "status": "400",
                            "content-length": len(content)
                            })
                    response.reason = "Bad Request" 
            else: raise