def get_cache_timeout_response(self, url, http_timeout_in_seconds = 20, max_cache_age_seconds = (1) * (24 * 60 * 60), # (number of days) * (number of seconds in a day), header_addons = {}): http_cached = httplib2.Http(self.CACHE_DIR, timeout=http_timeout_in_seconds) header_dict = {'cache-control':'max-age='+str(max_cache_age_seconds)} header_dict.update(header_addons) cache_read = http_cached.cache.get(url) if (cache_read): (response, content) = cache_read.split("\r\n\r\n", 1) else: (response, content) = http_cached.request(url, headers=header_dict) response['cache-control'] = "max-age=" + str(max_cache_age_seconds) httplib2._updateCache(header_dict, response, content, http_cached.cache, url) if response.fromcache: self.status["count_got_response_from_cache"] += 1 else: self.status["count_missed_cache"] += 1 self.status["count_cache_miss_details"] = str(self.status["count_cache_miss_details"]) + "; " + url self.status["count_cache_miss_response"] = str(response) self.status["count_api_requests"] += 1 if False: self.status["count_request_exception"] = "EXCEPTION!" self.status["count_uncached_call"] += 1 self.status["count_api_requests"] += 1 #(response, content) = http_cached.request(url, headers=header_dict.update({'cache-control':'no-cache'})) req = urllib2.Request(url, headers=header_dict) uh = urllib2.urlopen(req) content = uh.read() response = uh.info() return(response, content)
def _follow_redirect(self, uri, method, body, headers, response, content, max_redirects): """ Internal function to follow a redirect recieved by L{request} """ (scheme, authority, absolute_uri, defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri)) if self.cache: cachekey = defrag_uri else: cachekey = None # Pick out the location header and basically start from the beginning # remembering first to strip the ETag header and decrement our 'depth' if not response.has_key('location') and response.status != 300: raise httplib2.RedirectMissingLocation("Redirected but the response is missing a Location: header.", response, content) # Fix-up relative redirects (which violate an RFC 2616 MUST) if response.has_key('location'): location = response['location'] (scheme, authority, path, query, fragment) = httplib2.parse_uri(location) if authority == None: response['location'] = httplib2.urlparse.urljoin(uri, location) logging.debug('Relative redirect: changed [%s] to [%s]' % (location, response['location'])) if response.status == 301 and method in ["GET", "HEAD"]: response['-x-permanent-redirect-url'] = response['location'] if not response.has_key('content-location'): response['content-location'] = absolute_uri httplib2._updateCache(headers, response, content, self.cache, cachekey) headers.pop('if-none-match', None) headers.pop('if-modified-since', None) if response.has_key('location'): location = response['location'] redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method return self.request(location, redirect_method, body=body, headers = headers, max_redirects = max_redirects - 1) else: raise httplib2.RedirectLimit("Redirected more times than redirection_limit allows.", response, content)
def _follow_redirect(self, uri, method, body, headers, response, content, max_redirects): """Internal function to follow a redirect recieved by L{request}""" (scheme, authority, absolute_uri, defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri)) if self.cache: cachekey = defrag_uri else: cachekey = None # Pick out the location header and basically start from the beginning # remembering first to strip the ETag header and decrement our 'depth' if "location" not in response and response.status != 300: raise httplib2.RedirectMissingLocation( "Redirected but the response is missing a Location: header.", response, content) # Fix-up relative redirects (which violate an RFC 2616 MUST) if "location" in response: location = response['location'] (scheme, authority, path, query, fragment) = httplib2.parse_uri(location) if authority is None: response['location'] = httplib2.urlparse.urljoin(uri, location) pywikibot.debug( u"Relative redirect: changed [%s] to [%s]" % (location, response['location']), _logger) if response.status == 301 and method in ["GET", "HEAD"]: response['-x-permanent-redirect-url'] = response['location'] if "content-location" not in response: response['content-location'] = absolute_uri httplib2._updateCache(headers, response, content, self.cache, cachekey) headers.pop('if-none-match', None) headers.pop('if-modified-since', None) if "location" in response: location = response['location'] redirect_method = ( (response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method return self.request(location, redirect_method, body=body, headers=headers, max_redirects=max_redirects - 1) else: raise httplib2.RedirectLimit( "Redirected more times than redirection_limit allows.", response, content)
def OverrideRequest(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey): """Do the actual request using the connection object. Also follow one level of redirects if necessary. """ auths = ([(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]) auth = auths and sorted(auths)[0][1] or None if auth: auth.request(method, request_uri, headers, body) (response, content) = self._conn_request(conn, request_uri, method, body, headers) if auth: if auth.response(response, body): auth.request(method, request_uri, headers, body) (response, content) = self._conn_request(conn, request_uri, method, body, headers) response._stale_digest = 1 if response.status == 401: for authorization in self._auth_from_challenge( host, request_uri, headers, response, content): authorization.request(method, request_uri, headers, body) (response, content) = self._conn_request(conn, request_uri, method, body, headers) if response.status != 401: self.authorizations.append(authorization) authorization.response(response, body) break if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303): if self.follow_redirects and response.status in [300, 301, 302, 303, 307]: # Pick out the location header and basically start from the beginning # remembering first to strip the ETag header and decrement our 'depth' if redirections: if not response.has_key('location') and response.status != 300: raise httplib2.RedirectMissingLocation( "Redirected but the response is missing a Location: header.", response, content) # Fix-up relative redirects (which violate an RFC 2616 MUST) if response.has_key('location'): location = response['location'] (scheme, authority, path, query, fragment) = parse_uri(location) if authority == None: response['location'] = urlparse.urljoin(absolute_uri, location) if response.status == 301 and method in ["GET", "HEAD"]: response['-x-permanent-redirect-url'] = response['location'] if not response.has_key('content-location'): response['content-location'] = absolute_uri httplib2._updateCache(headers, response, content, self.cache, cachekey) if headers.has_key('if-none-match'): del headers['if-none-match'] if headers.has_key('if-modified-since'): del headers['if-modified-since'] if ('authorization' in headers and not self.forward_authorization_headers): del headers['authorization'] if response.has_key('location'): location = response['location'] old_response = copy.deepcopy(response) if not old_response.has_key('content-location'): old_response['content-location'] = absolute_uri redirect_method = method if response.status in [302, 303]: redirect_method = "GET" body = None (response, content) = self.request( location, redirect_method, body=body, headers=headers, redirections=redirections-1, connection_type=conn.__class__) response.previous = old_response else: raise httplib2.RedirectLimit( "Redirected more times than redirection_limit allows.", response, content) elif response.status in [200, 203] and method in ["GET", "HEAD"]: # Don't cache 206's since we aren't going to handle byte range # requests if not response.has_key('content-location'): response['content-location'] = absolute_uri httplib2._updateCache(headers, response, content, self.cache, cachekey) return (response, content)
def OverrideRequest(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey): """Do the actual request using the connection object. Also follow one level of redirects if necessary. """ auths = ([(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]) auth = auths and sorted(auths)[0][1] or None if auth: auth.request(method, request_uri, headers, body) (response, content) = self._conn_request(conn, request_uri, method, body, headers) if auth: if auth.response(response, body): auth.request(method, request_uri, headers, body) (response, content) = self._conn_request(conn, request_uri, method, body, headers) response._stale_digest = 1 if response.status == 401: for authorization in self._auth_from_challenge( host, request_uri, headers, response, content): authorization.request(method, request_uri, headers, body) (response, content) = self._conn_request(conn, request_uri, method, body, headers) if response.status != 401: self.authorizations.append(authorization) authorization.response(response, body) break if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303): if self.follow_redirects and response.status in [ 300, 301, 302, 303, 307 ]: # Pick out the location header and basically start from the beginning # remembering first to strip the ETag header and decrement our 'depth' if redirections: if not response.has_key( 'location') and response.status != 300: raise httplib2.RedirectMissingLocation( "Redirected but the response is missing a Location: header.", response, content) # Fix-up relative redirects (which violate an RFC 2616 MUST) if response.has_key('location'): location = response['location'] (scheme, authority, path, query, fragment) = parse_uri(location) if authority == None: response['location'] = urlparse.urljoin( absolute_uri, location) if response.status == 301 and method in ["GET", "HEAD"]: response['-x-permanent-redirect-url'] = response[ 'location'] if not response.has_key('content-location'): response['content-location'] = absolute_uri httplib2._updateCache(headers, response, content, self.cache, cachekey) if headers.has_key('if-none-match'): del headers['if-none-match'] if headers.has_key('if-modified-since'): del headers['if-modified-since'] if ('authorization' in headers and not self.forward_authorization_headers): del headers['authorization'] if response.has_key('location'): location = response['location'] old_response = copy.deepcopy(response) if not old_response.has_key('content-location'): old_response['content-location'] = absolute_uri redirect_method = method if response.status in [302, 303]: redirect_method = "GET" body = None (response, content) = self.request( location, redirect_method, body=body, headers=headers, redirections=redirections - 1, connection_type=conn.__class__) response.previous = old_response else: raise httplib2.RedirectLimit( "Redirected more times than redirection_limit allows.", response, content) elif response.status in [200, 203] and method in ["GET", "HEAD"]: # Don't cache 206's since we aren't going to handle byte range # requests if not response.has_key('content-location'): response['content-location'] = absolute_uri httplib2._updateCache(headers, response, content, self.cache, cachekey) return (response, content)
def request(self, uri, method="GET", body=None, headers=None, redirections=httplib2.DEFAULT_MAX_REDIRECTS, connection_type=None): """request handler with thread safety hacked in""" try: if headers is None: headers = {} else: headers = httplib2._normalize_headers(headers) if not headers.has_key('user-agent'): headers['user-agent'] = "Python-httplib2/%s" % httplib2.__version__ uri = httplib2.iri2uri(uri) (scheme, authority, request_uri, defrag_uri) = httplib2.urlnorm(uri) domain_port = authority.split(":")[0:2] if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http': scheme = 'https' authority = domain_port[0] conn_key = scheme+":"+authority def get_conn(conn_key): if conn_key in self.connections: conn = self.connections[conn_key] if type(conn) is list: for c in conn: if not getattr(c, 'busy', True): return c else: return c if type(conn) is list: return None conn = get_conn(conn_key) if conn is None: if not connection_type: connection_type = (scheme == 'https') and httplib2.HTTPSConnectionWithTimeout or httplib2.HTTPConnectionWithTimeout certs = list(self.certificates.iter(authority)) if scheme == 'https' and certs: conn = connection_type(authority, key_file=certs[0][0], cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info) self.connections.setdefault(conn_key, []).append(conn) else: conn = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info) self.connections.setdefault(conn_key, []).append(conn) conn.set_debuglevel(httplib2.debuglevel) conn.busy = True if method in ["GET", "HEAD"] and 'range' not in headers and 'accept-encoding' not in headers: headers['accept-encoding'] = 'deflate, gzip' info = httplib2.email.Message.Message() cached_value = None if self.cache: cachekey = defrag_uri cached_value = self.cache.get(cachekey) if cached_value: try: info, content = cached_value.split('\r\n\r\n', 1) feedparser = httplib2.email.FeedParser.FeedParser() feedparser.feed(info) info = feedparser.close() feedparser._parse = None except IndexError: self.cache.delete(cachekey) cachekey = None cached_value = None else: cachekey = None if method not in ["GET", "HEAD"] and self.cache and cachekey: # RFC 2616 Section 13.10 self.cache.delete(cachekey) if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers: if info.has_key('-x-permanent-redirect-url'): (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1) response.previous = Response(info) response.previous.fromcache = True else: entry_disposition = httplib2._entry_disposition(info, headers) if entry_disposition == "FRESH": if not cached_value: info['status'] = '504' content = "" response = Response(info) if cached_value: response.fromcache = True return (response, content) if entry_disposition == "STALE": if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers: headers['if-none-match'] = info['etag'] if info.has_key('last-modified') and not 'last-modified' in headers: headers['if-modified-since'] = info['last-modified'] elif entry_disposition == "TRANSPARENT": pass (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey) if response.status == 304 and method == "GET": # Rewrite the cache entry with the new end-to-end headers # Take all headers that are in response # and overwrite their values in info. # unless they are hop-by-hop, or are listed in the connection header. for key in httplib2._get_end2end_headers(response): info[key] = response[key] merged_response = Response(info) if hasattr(response, "_stale_digest"): merged_response._stale_digest = response._stale_digest httplib2._updateCache(headers, merged_response, content, self.cache, cachekey) response = merged_response response.status = 200 response.fromcache = True elif response.status == 200: content = new_content else: self.cache.delete(cachekey) content = new_content else: cc = httplib2._parse_cache_control(headers) if cc.has_key('only-if-cached'): info['status'] = '504' response = Response(info) content = "" else: (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey) except Exception, e: if self.force_exception_to_status_code: if isinstance(e, httplib2.HttpLib2ErrorWithResponse): response = e.response content = e.content response.status = 500 response.reason = str(e) elif isinstance(e, socket.timeout): content = "Request Timeout" response = Response( { "content-type": "text/plain", "status": "408", "content-length": len(content) }) response.reason = "Request Timeout" else: content = str(e) response = Response( { "content-type": "text/plain", "status": "400", "content-length": len(content) }) response.reason = "Bad Request" else: raise