def process_request(self, request, spider): for p in self.no_proxy_patterns: if p.search(request.url): return retries = request.meta.get('retry_times', None) #已手动制定代理的不设置 if 'proxy' in request.meta: if retries is None: return #当到达最大重试次数时,使用本机直接访问,确保失败时始终有一次本机访问. if retries == self.max_retry_times: now = time.time() should_sleep = self.local_interval - (now - self.local_last_use_time) if should_sleep > 0: log.msg('ProxyMiddleware:use proxy fail,local sleep %s' % should_sleep, log.DEBUG) time.sleep(should_sleep) return parsed = urlparse_cached(request) scheme = parsed.scheme # 'no_proxy' is only supported by http schemes if scheme in ('http', 'https') and proxy_bypass(parsed.hostname): return self._set_proxy(request, scheme)
def get_environ_proxies(netloc): """Return a dict of environment proxies.""" get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) # First check whether no_proxy is defined. If it is, check that the URL # we're getting isn't in the no_proxy list. no_proxy = get_proxy('no_proxy') if no_proxy: # We need to check whether we match here. We need to see if we match # the end of the netloc, both with and without the port. no_proxy = no_proxy.replace(' ', '').split(',') for host in no_proxy: if netloc.endswith(host) or netloc.split(':')[0].endswith(host): # The URL does match something in no_proxy, so we don't want # to apply the proxies on this URL. return {} # If the system proxy settings indicate that this URL should be bypassed, # don't proxy. if proxy_bypass(netloc): return {} # If we get here, we either didn't have no_proxy set or we're not going # anywhere that no_proxy applies to, and the system settings don't require # bypassing the proxy for the current URL. return getproxies()
def using_http_proxy(url): """ Return True if the url will use HTTP proxy. Returns False otherwise. """ up = urlparse(url) return up.scheme.lower() in getproxies() and not proxy_bypass(up.netloc)
def process_request(self, request, spider): #无代理可用 或 不需要代理(如localhost) if request.meta.get('no_proxy', 0) >= 1: logging.info('免代理 url=[%s]' % request.url) return #按照scheme取代理 parsed = urlparse_cached(request) scheme = parsed.scheme if parsed.scheme in self.proxy_pool else 'http' #不能使用代理 if proxy_bypass(parsed.hostname): logging.info('不能使用代理. parsed.hostname:%s' % parsed.hostname) return if scheme not in self.proxy_pool or len(self.proxy_pool.get(scheme)) < 1: self.update_proxy_pool() if len(self.proxy_pool[scheme]) < 1: logging.warning("更新代理库后仍然没有适合[%s]的代理,使用http协议替换" % scheme) scheme = 'http' #设置代理 creds,proxy_url = self.proxy_pool[scheme].pop() logging.debug('取出一个代理. creds=[%s] proxy_url=[%s]' % (creds, proxy_url)) request.meta['proxy']=proxy_url if creds : request.headers['Porxy-Authorization'] = creds logging.debug('代理设置. url=[%s] proxy_url:%s' % (request.url, proxy_url)) return
def proxy_open(self, req, proxy, type): orig_type = req.get_type() proxy_type, user, password, hostport = _parse_proxy(proxy) if proxy_type is None: proxy_type = orig_type if req.host and proxy_bypass(req.host): return None if user and password: user_pass = '******' % (unquote(user), unquote(password)) creds = base64.b64encode(user_pass).strip() req.add_header('Proxy-authorization', 'Basic ' + creds) hostport = unquote(hostport) req.set_proxy(hostport, proxy_type) if orig_type == proxy_type or orig_type == 'https': # let other handlers take care of it return None else: # need to start over, because the other handlers don't # grok the proxy's URL type # e.g. if we have a constructor arg proxies like so: # {'http': 'ftp://proxy.example.com'}, we may end up turning # a request for http://acme.example.com/a into one for # ftp://proxy.example.com/a return self.parent.open(req, timeout=req.timeout)
def open_http(url, data=None): """Use HTTP protocol.""" import httplib user_passwd = None proxy_passwd = None if isinstance(url, str): host, selector = urllib.splithost(url) if host: user_passwd, host = urllib.splituser(host) host = urllib.unquote(host) realhost = host else: host, selector = url # check whether the proxy contains authorization information proxy_passwd, host = urllib.splituser(host) # now we proceed with the url we want to obtain urltype, rest = urllib.splittype(selector) url = rest user_passwd = None if urltype.lower() != 'http': realhost = None else: realhost, rest = urllib.splithost(rest) if realhost: user_passwd, realhost = urllib.splituser(realhost) if user_passwd: selector = "%s://%s%s" % (urltype, realhost, rest) if urllib.proxy_bypass(realhost): host = realhost #print "proxy via http:", host, selector if not host: raise IOError('http error', 'no host given') if proxy_passwd: import base64 proxy_auth = base64.b64encode(proxy_passwd).strip() else: proxy_auth = None if user_passwd: import base64 auth = base64.b64encode(user_passwd).strip() else: auth = None c = FakeHTTPConnection(host) if data is not None: c.putrequest('POST', selector) c.putheader('Content-Type', 'application/x-www-form-urlencoded') c.putheader('Content-Length', '%d' % len(data)) else: c.putrequest('GET', selector) if proxy_auth: c.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) if auth: c.putheader('Authorization', 'Basic %s' % auth) if realhost: c.putheader('Host', realhost) for args in urllib.URLopener().addheaders: c.putheader(*args) c.endheaders() return c
def open_http(url, data=None): """Use HTTP protocol.""" import httplib user_passwd = None proxy_passwd= None if isinstance(url, str): host, selector = urllib.splithost(url) if host: user_passwd, host = urllib.splituser(host) host = urllib.unquote(host) realhost = host else: host, selector = url # check whether the proxy contains authorization information proxy_passwd, host = urllib.splituser(host) # now we proceed with the url we want to obtain urltype, rest = urllib.splittype(selector) url = rest user_passwd = None if urltype.lower() != 'http': realhost = None else: realhost, rest = urllib.splithost(rest) if realhost: user_passwd, realhost = urllib.splituser(realhost) if user_passwd: selector = "%s://%s%s" % (urltype, realhost, rest) if urllib.proxy_bypass(realhost): host = realhost #print "proxy via http:", host, selector if not host: raise IOError, ('http error', 'no host given') if proxy_passwd: import base64 proxy_auth = base64.b64encode(proxy_passwd).strip() else: proxy_auth = None if user_passwd: import base64 auth = base64.b64encode(user_passwd).strip() else: auth = None c = FakeHTTPConnection(host) if data is not None: c.putrequest('POST', selector) c.putheader('Content-Type', 'application/x-www-form-urlencoded') c.putheader('Content-Length', '%d' % len(data)) else: c.putrequest('GET', selector) if proxy_auth: c.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) if auth: c.putheader('Authorization', 'Basic %s' % auth) if realhost: c.putheader('Host', realhost) for args in urllib.URLopener().addheaders: c.putheader(*args) c.endheaders() return c
def ignore_proxy_host (self): """Check if self.host is in the $no_proxy ignore list.""" if urllib.proxy_bypass(self.host): return True no_proxy = os.environ.get("no_proxy") if no_proxy: entries = [parse_host_port(x) for x in no_proxy.split(",")] for host, port in entries: if host.lower() == self.host and port == self.port: return True return False
def process_request(self, request, spider): # ignore if proxy is already seted if 'proxy' in request.meta: return parsed = urlparse_cached(request) scheme = parsed.scheme # 'no_proxy' is only supported by http schemes if scheme in ('http', 'https') and proxy_bypass(parsed.hostname): return if scheme in self.proxies: self._set_proxy(request, scheme)
def proxy_open(self, req, proxy, type): orig_type = req.get_type() proxy_type, user, password, hostport = _parse_proxy(proxy) if proxy_type is None: proxy_type = orig_type if req.host and proxy_bypass(req.host): return if user and password: user_pass = '******' % (unquote(user), unquote(password)) creds = base64.b64encode(user_pass).strip() req.add_header('Proxy-authorization', 'Basic ' + creds) hostport = unquote(hostport) req.set_proxy(hostport, proxy_type) if orig_type == proxy_type or orig_type == 'https': return else: return self.parent.open(req, timeout=req.timeout)
def find_proxy(url): scheme, netloc, path, pars, query, fragment = urlparse.urlparse(url) proxies = urllib.getproxies() proxyhost = None if scheme in proxies: if '@' in netloc: sidx = netloc.find('@') + 1 else: sidx = 0 eidx = netloc.find(':') if eidx == -1: eidx = len(netloc) host = netloc[sidx:eidx] if not (host == '127.0.0.1' or urllib.proxy_bypass(host)): proxyurl = proxies[scheme] proxyelems = urlparse.urlparse(proxyurl) proxyhost = proxyelems[1] if DEBUG: print >> sys.stderr, 'find_proxy: Got proxies', proxies, 'selected', proxyhost, 'URL was', url return proxyhost
def process_request(self, request, spider): # ignore if proxy is already seted if 'proxy' in request.meta: # parse out user/password # if exists setup authentication creds, proxy = self._get_proxy(request.meta['proxy'], 'http') if creds: request.meta['proxy'] = proxy request.headers['Proxy-Authorization'] = 'Basic ' + creds return parsed = urlparse_cached(request) scheme = parsed.scheme # 'no_proxy' is only supported by http schemes if scheme in ('http', 'https') and proxy_bypass(parsed.hostname): return if scheme in self.proxies: self._set_proxy(request, scheme)
def find_proxy(url): """ Returns proxy host as "host:port" string """ (scheme, netloc, path, pars, query, fragment) = urlparse.urlparse(url) proxies = urllib.getproxies() proxyhost = None if scheme in proxies: if '@' in netloc: sidx = netloc.find('@')+1 else: sidx = 0 # IPVSIX TODO: what if host is IPv6 address eidx = netloc.find(':') if eidx == -1: eidx = len(netloc) host = netloc[sidx:eidx] if not (host == "127.0.0.1" or urllib.proxy_bypass(host)): proxyurl = proxies[scheme] proxyelems = urlparse.urlparse(proxyurl) proxyhost = proxyelems[1] if DEBUG: print >>sys.stderr,"find_proxy: Got proxies",proxies,"selected",proxyhost,"URL was",url return proxyhost
def find_proxy(url): """ Returns proxy host as "host:port" string """ (scheme, netloc, path, pars, query, fragment) = urlparse.urlparse(url) proxies = urllib.getproxies() proxyhost = None if scheme in proxies: if '@' in netloc: sidx = netloc.find('@') + 1 else: sidx = 0 # IPVSIX TODO: what if host is IPv6 address eidx = netloc.find(':') if eidx == -1: eidx = len(netloc) host = netloc[sidx:eidx] if not (host == "127.0.0.1" or urllib.proxy_bypass(host)): proxyurl = proxies[scheme] proxyelems = urlparse.urlparse(proxyurl) proxyhost = proxyelems[1] if DEBUG: print >> sys.stderr, "find_proxy: Got proxies", proxies, "selected", proxyhost, "URL was", url return proxyhost
def proxy_open(self, req, proxy, type): orig_type = req.get_type() proxy_type, user, password, hostport = _parse_proxy(proxy) if proxy_type is None: proxy_type = orig_type req.get_host() if req.host and proxy_bypass(req.host): return None if user and password: user_pass = '******' % (unquote(user), unquote(password)) creds = base64.encodestring(user_pass).strip() req.add_header('Proxy-authorization', 'Basic ' + creds) hostport = unquote(hostport) req.set_proxy(hostport, proxy_type) if orig_type == proxy_type: # let other handlers take care of it # XXX this only makes sense if the proxy is before the # other handlers return None else: # need to start over, because the other handlers don't # grok the proxy's URL type return self.parent.open(req)
def _setup_connection(self, protocol, netloc): """Takes care of managing proxies if any. This is a first attempt to manage proxies. Authentication is not yet taken into account. This all stuff is not tested yet. Parameters ---------- protocol: str http or https netloc: str url to connect to Returns ------- HTTP(S)Session properly set up in case of proxies """ proxies = urllib.getproxies() # We process proxy if a proxy is defined for this protocol and the # netloc to connect to is not in the bypass list. if protocol in proxies and urllib.proxy_bypass(netloc) == 0: proxy = proxies[protocol] urltype, proxyhost = urllib.splittype(proxy) host, selector = urllib.splithost(proxyhost) host, port = urllib.splitport(host) if protocol == 'https': self.connections[protocol+netloc] = client.HTTPSConnection(host, port) self.connections[protocol+netloc].set_tunnel(netloc, 443) else: self.connections[protocol+netloc] = client.HTTPSConnection(host, port) self.connections[protocol+netloc].set_tunnel(netloc, 80) else: if protocol == 'https': self.connections[protocol+netloc] = client.HTTPSConnection(netloc) else: self.connections[protocol+netloc] = client.HTTPConnection(netloc)
def __init__( self, url, method="GET", data=None, headers=None, headers_only=False, user_agent=None, follow_location=False, force_quiet=True, ): GObjectWrapper.__init__(self) self.result = StringIO.StringIO() self.result_headers = StringIO.StringIO() if isinstance(url, unicode): self.url = url.encode("utf-8") else: self.url = url self.method = method self.data = data self.headers = headers self.status = None # the actual curl request object self.curl = pycurl.Curl() if logging.root.level == logging.DEBUG and not force_quiet: self.curl.setopt(pycurl.VERBOSE, 1) self.curl.setopt(pycurl.WRITEFUNCTION, self.result.write) self.curl.setopt(pycurl.HEADERFUNCTION, self.result_headers.write) # We want to use gzip and deflate if possible: self.curl.setopt(pycurl.ENCODING, "") # use all available encodings self.curl.setopt(pycurl.URL, self.url) # let's set the HTTP request method if method == "GET": self.curl.setopt(pycurl.HTTPGET, 1) elif method == "POST": self.curl.setopt(pycurl.POST, 1) elif method == "PUT": self.curl.setopt(pycurl.UPLOAD, 1) else: self.curl.setopt(pycurl.CUSTOMREQUEST, method) if data: if method == "PUT": self.data = StringIO.StringIO(data) self.curl.setopt(pycurl.READFUNCTION, self.data.read) self.curl.setopt(pycurl.INFILESIZE, len(self.data.getvalue())) else: self.curl.setopt(pycurl.POSTFIELDS, self.data) self.curl.setopt(pycurl.POSTFIELDSIZE, len(self.data)) if headers: self.curl.setopt(pycurl.HTTPHEADER, headers) if headers_only: self.curl.setopt(pycurl.HEADER, 1) self.curl.setopt(pycurl.NOBODY, 1) if user_agent: self.curl.setopt(pycurl.USERAGENT, user_agent) if follow_location: self.curl.setopt(pycurl.FOLLOWLOCATION, 1) if libproxy: for proxy in proxy_factory.getProxies(self.url): # only use the first one self.curl.setopt(pycurl.PROXY, proxy) break else: # Proxy: let's be careful to isolate the protocol to ensure that we # support the case where http and https might use different proxies split_url = self.url.split("://", 1) if len(split_url) > 1: # We were able to get a protocol protocol, address = split_url host, _path = urllib.splithost("//" + address) proxies = urllib.getproxies() if protocol in proxies and not urllib.proxy_bypass(host): self.curl.setopt(pycurl.PROXY, proxies[protocol]) # self reference required, because CurlMulti will only return # Curl handles self.curl.request = self
def loadPage(self, url, uri=None, method="GET", params="", additionalParams=""): if not url: logging.error("Request URL undefined") tools.exitErr() if not url.startswith("http"): url = "https://" + url urlData = urlparse(url) if not uri: url = "%s://%s" (urlData.scheme, urlData.netloc) uri = urlData.path + '?' + urlData.query # prepare params, append to uri if params: params = urlencode(params) + additionalParams if method == "GET": uri += ('?' if uri.find('?') == -1 else '&') + params params = "" # insert local cookies in request headers = { "Cookie": '; '.join([key + '=' + self.cookies[key] for key in self.cookies.keys()]) } if method == "POST": headers["Content-type"] = "application/x-www-form-urlencoded" if self._proxy is None or proxy_bypass(urlData.hostname): host = urlData.hostname port = urlData.port real_host = real_port = None else: host = self._proxy.hostname port = self._proxy.port real_host = urlData.hostname real_port = urlData.port logging.debug("Request URL: %s:/%s > %s # %s", url, uri, unquote(params), headers["Cookie"]) conn = httplib.HTTPSConnection(host, port) if real_host is not None: conn.set_tunnel(real_host, real_port, headers=self._proxy_auth) if config.DEBUG: conn.set_debuglevel(1) conn.request(method, url + uri, params, headers) response = conn.getresponse() data = response.read() conn.close() logging.debug("Response : %s > %s", response.status, response.getheaders()) result = tools.Struct(status=response.status, location=response.getheader('location', None), data=data) # update local cookies sk = Cookie.SimpleCookie(response.getheader("Set-Cookie", "")) for key in sk: self.cookies[key] = sk[key].value # delete cookies whose content is "deleteme" for key in self.cookies.keys(): if self.cookies[key] == "deleteme": del self.cookies[key] return result
def proxied(value): netloc = urlparse(value).netloc proxied = bool(getproxies_environment()) and not proxy_bypass(netloc) return (proxied)
def _build_opener(url): from osc.core import __version__ global config apiurl = urljoin(*parse_apisrv_url(None, url)) if "last_opener" not in _build_opener.__dict__: _build_opener.last_opener = (None, None) if apiurl == _build_opener.last_opener[0]: return _build_opener.last_opener[1] # respect no_proxy env variable if urllib.proxy_bypass(apiurl): # initialize with empty dict proxyhandler = urllib2.ProxyHandler({}) else: # read proxies from env proxyhandler = urllib2.ProxyHandler() # workaround for http://bugs.python.org/issue9639 authhandler_class = urllib2.HTTPBasicAuthHandler if ( sys.version_info >= (2, 6, 6) and sys.version_info < (2, 7, 1) and not "reset_retry_count" in dir(urllib2.HTTPBasicAuthHandler) ): print >> sys.stderr, "warning: your urllib2 version seems to be broken. " "Using a workaround for http://bugs.python.org/issue9639" class OscHTTPBasicAuthHandler(urllib2.HTTPBasicAuthHandler): def http_error_401(self, *args): response = urllib2.HTTPBasicAuthHandler.http_error_401(self, *args) self.retried = 0 return response def http_error_404(self, *args): self.retried = 0 return None authhandler_class = OscHTTPBasicAuthHandler elif sys.version_info >= (2, 6, 6) and sys.version_info < (2, 7, 1): class OscHTTPBasicAuthHandler(urllib2.HTTPBasicAuthHandler): def http_error_404(self, *args): self.reset_retry_count() return None authhandler_class = OscHTTPBasicAuthHandler elif sys.version_info >= (2, 6, 5) and sys.version_info < (2, 6, 6): # workaround for broken urllib2 in python 2.6.5: wrong credentials # lead to an infinite recursion class OscHTTPBasicAuthHandler(urllib2.HTTPBasicAuthHandler): def retry_http_basic_auth(self, host, req, realm): # don't retry if auth failed if req.get_header(self.auth_header, None) is not None: return None return urllib2.HTTPBasicAuthHandler.retry_http_basic_auth(self, host, req, realm) authhandler_class = OscHTTPBasicAuthHandler options = config["api_host_options"][apiurl] # with None as first argument, it will always use this username/password # combination for urls for which arg2 (apisrv) is a super-url authhandler = authhandler_class(urllib2.HTTPPasswordMgrWithDefaultRealm()) authhandler.add_password(None, apiurl, options["user"], options["pass"]) if options["sslcertck"]: try: import oscssl from M2Crypto import m2urllib2 except ImportError, e: print e raise NoSecureSSLError( "M2Crypto is needed to access %s in a secure way.\nPlease install python-m2crypto." % apiurl ) cafile = options.get("cafile", None) capath = options.get("capath", None) if not cafile and not capath: for i in ["/etc/pki/tls/cert.pem", "/etc/ssl/certs"]: if os.path.isfile(i): cafile = i break elif os.path.isdir(i): capath = i break ctx = oscssl.mySSLContext() if ctx.load_verify_locations(capath=capath, cafile=cafile) != 1: raise Exception("No CA certificates found") opener = m2urllib2.build_opener( ctx, oscssl.myHTTPSHandler(ssl_context=ctx, appname="osc"), urllib2.HTTPCookieProcessor(cookiejar), authhandler, proxyhandler, )
def loadPage(self, url, uri=None, method="GET", params="", additionalParams=""): if not url: logging.error("Request URL undefined") tools.exitErr() if not url.startswith("http"): url = "https://" + url urlData = urlparse(url) if not uri: url = "%s://%s" (urlData.scheme, urlData.netloc) uri = urlData.path + '?' + urlData.query # prepare params, append to uri if params: params = urlencode(params) + additionalParams if method == "GET": uri += ('?' if uri.find('?') == -1 else '&') + params params = "" # insert local cookies in request headers = { "Cookie": '; '.join( [key + '=' + self.cookies[key] for key in self.cookies.keys()]) } if method == "POST": headers["Content-type"] = "application/x-www-form-urlencoded" if self._proxy is None or proxy_bypass(urlData.hostname): host = urlData.hostname port = urlData.port real_host = real_port = None else: host = self._proxy.hostname port = self._proxy.port real_host = urlData.hostname real_port = urlData.port logging.debug("Request URL: %s:/%s > %s # %s", url, uri, unquote(params), headers["Cookie"]) conn = httplib.HTTPSConnection(host, port) if real_host is not None: conn.set_tunnel(real_host, real_port, headers=self._proxy_auth) if config.DEBUG: conn.set_debuglevel(1) conn.request(method, url + uri, params, headers) response = conn.getresponse() data = response.read() conn.close() logging.debug("Response : %s > %s", response.status, response.getheaders()) result = tools.Struct(status=response.status, location=response.getheader('location', None), data=data) # update local cookies sk = Cookie.SimpleCookie(response.getheader("Set-Cookie", "")) for key in sk: self.cookies[key] = sk[key].value # delete cookies whose content is "deleteme" for key in self.cookies.keys(): if self.cookies[key] == "deleteme": del self.cookies[key] return result
def open_http(url, data=None): """Use HTTP protocol.""" import httplib user_passwd = None proxy_passwd = None if isinstance(url, str): host, selector = urllib.splithost(url) if host: user_passwd, host = urllib.splituser(host) host = urllib.unquote(host) realhost = host else: host, selector = url # check whether the proxy contains authorization information proxy_passwd, host = urllib.splituser(host) # now we proceed with the url we want to obtain urltype, rest = urllib.splittype(selector) url = rest user_passwd = None if urltype.lower() != "http": realhost = None else: realhost, rest = urllib.splithost(rest) if realhost: user_passwd, realhost = urllib.splituser(realhost) if user_passwd: selector = "%s://%s%s" % (urltype, realhost, rest) if urllib.proxy_bypass(realhost): host = realhost # print "proxy via http:", host, selector if not host: raise IOError, ("http error", "no host given") if proxy_passwd: import base64 proxy_auth = base64.b64encode(proxy_passwd).strip() else: proxy_auth = None if user_passwd: import base64 auth = base64.b64encode(user_passwd).strip() else: auth = None h = HTTP(host) if data is not None: h.putrequest("POST", selector) h.putheader("Content-Type", "application/x-www-form-urlencoded") h.putheader("Content-Length", "%d" % len(data)) else: h.putrequest("GET", selector) if proxy_auth: h.putheader("Proxy-Authorization", "Basic %s" % proxy_auth) if auth: h.putheader("Authorization", "Basic %s" % auth) if realhost: h.putheader("Host", realhost) for args in urllib.URLopener().addheaders: h.putheader(*args) h.endheaders() return h
def _build_opener(url): from osc.core import __version__ global config apiurl = urljoin(*parse_apisrv_url(None, url)) if 'last_opener' not in _build_opener.__dict__: _build_opener.last_opener = (None, None) if apiurl == _build_opener.last_opener[0]: return _build_opener.last_opener[1] # respect no_proxy env variable if urllib.proxy_bypass(apiurl): # initialize with empty dict proxyhandler = urllib2.ProxyHandler({}) else: # read proxies from env proxyhandler = urllib2.ProxyHandler() # workaround for http://bugs.python.org/issue9639 authhandler_class = urllib2.HTTPBasicAuthHandler if sys.version_info >= (2, 6, 6) and sys.version_info < (2, 7, 1) \ and not 'reset_retry_count' in dir(urllib2.HTTPBasicAuthHandler): print >>sys.stderr, 'warning: your urllib2 version seems to be broken. ' \ 'Using a workaround for http://bugs.python.org/issue9639' class OscHTTPBasicAuthHandler(urllib2.HTTPBasicAuthHandler): def http_error_401(self, *args): response = urllib2.HTTPBasicAuthHandler.http_error_401(self, *args) self.retried = 0 return response def http_error_404(self, *args): self.retried = 0 return None authhandler_class = OscHTTPBasicAuthHandler elif sys.version_info >= (2, 6, 6) and sys.version_info < (2, 7, 1): class OscHTTPBasicAuthHandler(urllib2.HTTPBasicAuthHandler): def http_error_404(self, *args): self.reset_retry_count() return None authhandler_class = OscHTTPBasicAuthHandler elif sys.version_info >= (2, 6, 5) and sys.version_info < (2, 6, 6): # workaround for broken urllib2 in python 2.6.5: wrong credentials # lead to an infinite recursion class OscHTTPBasicAuthHandler(urllib2.HTTPBasicAuthHandler): def retry_http_basic_auth(self, host, req, realm): # don't retry if auth failed if req.get_header(self.auth_header, None) is not None: return None return urllib2.HTTPBasicAuthHandler.retry_http_basic_auth(self, host, req, realm) authhandler_class = OscHTTPBasicAuthHandler options = config['api_host_options'][apiurl] # with None as first argument, it will always use this username/password # combination for urls for which arg2 (apisrv) is a super-url authhandler = authhandler_class( \ urllib2.HTTPPasswordMgrWithDefaultRealm()) authhandler.add_password(None, apiurl, options['user'], options['pass']) if options['sslcertck']: try: import oscssl from M2Crypto import m2urllib2 except ImportError, e: print e raise NoSecureSSLError('M2Crypto is needed to access %s in a secure way.\nPlease install python-m2crypto.' % apiurl) cafile = options.get('cafile', None) capath = options.get('capath', None) if not cafile and not capath: for i in ['/etc/pki/tls/cert.pem', '/etc/ssl/certs']: if os.path.isfile(i): cafile = i break elif os.path.isdir(i): capath = i break ctx = oscssl.mySSLContext() if ctx.load_verify_locations(capath=capath, cafile=cafile) != 1: raise Exception('No CA certificates found') opener = m2urllib2.build_opener(ctx, oscssl.myHTTPSHandler(ssl_context=ctx, appname='osc'), urllib2.HTTPCookieProcessor(cookiejar), authhandler, proxyhandler)
def __init__(self, url, method='GET', data=None, headers=None, headers_only=False, user_agent=None, follow_location=False, force_quiet=True): GObjectWrapper.__init__(self) self.result = StringIO.StringIO() self.result_headers = StringIO.StringIO() if isinstance(url, unicode): self.url = url.encode("utf-8") else: self.url = url self.method = method self.data = data self.headers = headers self.status = None # the actual curl request object self.curl = pycurl.Curl() if (logging.root.level == logging.DEBUG and not force_quiet): self.curl.setopt(pycurl.VERBOSE, 1) self.curl.setopt(pycurl.WRITEFUNCTION, self.result.write) self.curl.setopt(pycurl.HEADERFUNCTION, self.result_headers.write) # We want to use gzip and deflate if possible: self.curl.setopt(pycurl.ENCODING, "") # use all available encodings self.curl.setopt(pycurl.URL, self.url) # let's set the HTTP request method if method == 'GET': self.curl.setopt(pycurl.HTTPGET, 1) elif method == 'POST': self.curl.setopt(pycurl.POST, 1) elif method == 'PUT': self.curl.setopt(pycurl.UPLOAD, 1) else: self.curl.setopt(pycurl.CUSTOMREQUEST, method) if data: if method == "PUT": self.data = StringIO.StringIO(data) self.curl.setopt(pycurl.READFUNCTION, self.data.read) self.curl.setopt(pycurl.INFILESIZE, len(self.data.getvalue())) else: self.curl.setopt(pycurl.POSTFIELDS, self.data) self.curl.setopt(pycurl.POSTFIELDSIZE, len(self.data)) if headers: self.curl.setopt(pycurl.HTTPHEADER, headers) if headers_only: self.curl.setopt(pycurl.HEADER, 1) self.curl.setopt(pycurl.NOBODY, 1) if user_agent: self.curl.setopt(pycurl.USERAGENT, user_agent) if follow_location: self.curl.setopt(pycurl.FOLLOWLOCATION, 1) if libproxy: for proxy in proxy_factory.getProxies(self.url): # if we connect to localhost (localtm) with proxy specifically # set to direct://, libcurl connects fine, but then asks # GET http://localhost:55555/unit/en/af/whatever # instead of # GET /unit/en/af/whatever # and it doesn't work. We have to set it specifically to "" # though, otherwise it seems to fall back to environment # variables. if proxy == "direct://": proxy = "" self.curl.setopt(pycurl.PROXY, proxy) #only use the first one break else: # Proxy: let's be careful to isolate the protocol to ensure that we # support the case where http and https might use different proxies split_url = self.url.split('://', 1) if len(split_url) > 1: #We were able to get a protocol protocol, address = split_url host, _path = urllib.splithost('//' + address) proxies = urllib.getproxies() if protocol in proxies and not urllib.proxy_bypass(host): self.curl.setopt(pycurl.PROXY, proxies[protocol]) # self reference required, because CurlMulti will only return # Curl handles self.curl.request = self