def resetProxies(self, httpProxyTuple): # for ntlm user and password are required self.hasNTLM = False if isinstance(httpProxyTuple,(tuple,list)) and len(httpProxyTuple) == 5: useOsProxy, _urlAddr, _urlPort, user, password = httpProxyTuple _proxyDirFmt = proxyDirFmt(httpProxyTuple) # only try ntlm if user and password are provided because passman is needed if user and not useOsProxy: for pluginXbrlMethod in pluginClassMethods("Proxy.HTTPNtlmAuthHandler"): HTTPNtlmAuthHandler = pluginXbrlMethod() if HTTPNtlmAuthHandler is not None: self.hasNTLM = True if not self.hasNTLM: # try for python site-packages ntlm try: from ntlm import HTTPNtlmAuthHandler self.hasNTLM = True except ImportError: pass if self.hasNTLM: pwrdmgr = proxyhandlers.HTTPPasswordMgrWithDefaultRealm() pwrdmgr.add_password(None, _proxyDirFmt["http"], user, password) self.proxy_handler = proxyhandlers.ProxyHandler({}) self.proxy_auth_handler = proxyhandlers.ProxyBasicAuthHandler(pwrdmgr) self.http_auth_handler = proxyhandlers.HTTPBasicAuthHandler(pwrdmgr) self.ntlm_auth_handler = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler(pwrdmgr) self.opener = proxyhandlers.build_opener(self.proxy_handler, self.ntlm_auth_handler, self.proxy_auth_handler, self.http_auth_handler) if not self.hasNTLM: self.proxy_handler = proxyhandlers.ProxyHandler(proxyDirFmt(httpProxyTuple)) self.proxy_auth_handler = proxyhandlers.ProxyBasicAuthHandler() self.http_auth_handler = proxyhandlers.HTTPBasicAuthHandler() self.opener = proxyhandlers.build_opener(self.proxy_handler, self.proxy_auth_handler, self.http_auth_handler)
def resetProxies(self, httpProxyTuple): # for ntlm user and password are required self.hasNTLM = False self._httpProxyTuple = httpProxyTuple # save for resetting in noCertificateCheck setter if isinstance(httpProxyTuple, (tuple, list)) and len(httpProxyTuple) == 5: useOsProxy, _urlAddr, _urlPort, user, password = httpProxyTuple _proxyDirFmt = proxyDirFmt(httpProxyTuple) # only try ntlm if user and password are provided because passman is needed if user and not useOsProxy: for pluginXbrlMethod in pluginClassMethods( "Proxy.HTTPAuthenticate"): pluginXbrlMethod(self.cntlr) for pluginXbrlMethod in pluginClassMethods( "Proxy.HTTPNtlmAuthHandler"): HTTPNtlmAuthHandler = pluginXbrlMethod() if HTTPNtlmAuthHandler is not None: self.hasNTLM = True if not self.hasNTLM: # try for python site-packages ntlm try: from ntlm import HTTPNtlmAuthHandler self.hasNTLM = True except ImportError: pass if self.hasNTLM: pwrdmgr = proxyhandlers.HTTPPasswordMgrWithDefaultRealm() pwrdmgr.add_password(None, _proxyDirFmt["http"], user, password) self.proxy_handler = proxyhandlers.ProxyHandler({}) self.proxy_auth_handler = proxyhandlers.ProxyBasicAuthHandler( pwrdmgr) self.http_auth_handler = proxyhandlers.HTTPBasicAuthHandler( pwrdmgr) self.ntlm_auth_handler = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler( pwrdmgr) proxyHandlers = [ self.proxy_handler, self.ntlm_auth_handler, self.proxy_auth_handler, self.http_auth_handler ] if not self.hasNTLM: self.proxy_handler = proxyhandlers.ProxyHandler( proxyDirFmt(httpProxyTuple)) self.proxy_auth_handler = proxyhandlers.ProxyBasicAuthHandler() self.http_auth_handler = proxyhandlers.HTTPBasicAuthHandler() proxyHandlers = [ self.proxy_handler, self.proxy_auth_handler, self.http_auth_handler ] if ssl and self.noCertificateCheck: # this is required in some Akamai environments, such as sec.gov context = ssl.create_default_context() context.check_hostname = False context.verify_mode = ssl.CERT_NONE proxyHandlers.append(proxyhandlers.HTTPSHandler(context=context)) self.opener = proxyhandlers.build_opener(*proxyHandlers) self.opener.addheaders = [('User-Agent', self.httpUserAgent), ('Accept-Encoding', 'gzip, deflate')]
def request(self, url, params=None, decode=True): args = [url] try: if proxy['enable'] and self.site_name in url: opener = request.build_opener( request.ProxyBasicAuthHandler(), request.ProxyHandler(proxy['proxy_urls'])) logger.info('proxy used for "%s"', url) else: opener = request.build_opener() # use cookies only for lostfilm site urls if self.site_name in url: if not params: params = self.cookies else: params.update(self.cookies) if params: args.append(parse.urlencode(params).encode('utf-8')) result = opener.open(*args).read() return result if not decode else result.decode('utf-8') except Exception as e: logger.error('%s url="%s" params="%s"' % (e, url, params))
def use_proxy(url): proxy_handler = request.ProxyHandler({'http': '122.96.59.103:82/'}) proxy_auth_handler = request.ProxyBasicAuthHandler() #proxy_auth_handler.add_password('realm', 'host', 'username', 'password') opener = request.build_opener(proxy_handler, proxy_auth_handler) response = opener.open(url) return response
def money_proxy_use(): # 第一种方式付费代理发送请求 # 代理IP money_proxy = {"http": "username:[email protected]:8080"} # 2.代理的处理器 proxy_handler = request.ProxyHandler(money_proxy) # 3.通过处理器创建opener opener = request.build_opener(proxy_handler) # 4.open发送请求 opener.open("http://www.baidu.com") # 第二种方式发送付费的IP代理 user_name = "abcname" pwd = "123456" proxy_money = "158.63.66.77:8080" # 创建密码管理器,添加用户名和密码 password_manager = request.HTTPPasswordMgrWithPriorAuth() # uri定位 uri>url # url 资源地位符 password_manager.add_password(None, proxy_money, user_name, pwd) # 创建可以验证代理IP的处理器 handle_auth_proxy = request.ProxyBasicAuthHandler(password_manager) # 根据处理器创建opener opener_auth = request.build_opener(handle_auth_proxy) # 发送请求 response = opener.open("http://www.baid.com") print(response.read())
def proxy(): proxy_handler = request.ProxyHandler( {'http': 'http://www.example.com:3128/'}) proxy_auth_handler = request.ProxyBasicAuthHandler() proxy_auth_handler.add_password('realm', 'host', 'username', 'password') opener = request.build_opener(proxy_handler, proxy_auth_handler) with opener.open('http://www.example.com/login.html') as f: pass
def get_url_Proxy(): get_url = 'https://www.cnblogs.com/' proxy_handler = request.ProxyHandler({'http': '58.64.57.12:3128'}) proxy_auth_handler = request.ProxyBasicAuthHandler() proxy_auth_handler.add_password('realm', 'host', 'username', 'password') opener = request.build_opener(proxy_handler, proxy_auth_handler) with opener.open(get_url) as f: print(f.read().decode('utf-8'))
def testProxy(): proxy_handler = request.ProxyHandler({'http': 'http://www.baidu.com/'}) proxy_auth_handler = request.ProxyBasicAuthHandler() proxy_auth_handler.add_password('realm', 'host', 'username', 'password') opener = request.build_opener(proxy_handler, proxy_auth_handler) with opener.open('http://www.baidu.com/') as f: print('Status:', f.status, f.reason) for k, v in f.getheaders(): print('%s: %s' % (k, v)) print('Data:', f.read())
def installopener(): opener = urllib2.build_opener( urllib2.ProxyHandler(proxies_dict), urllib2.ProxyBasicAuthHandler(proxypwdmgr), urllib2.ProxyDigestAuthHandler(proxypwdmgr), urllib2.HTTPHandler, ) # digest auth may not work with all proxies # http://bugs.python.org/issue16095 # could add windows/nltm authentication here #opener=urllib2.build_opener(urllib2.ProxyHandler(proxies_dict), urllib2.HTTPHandler) urllib2.install_opener(opener)
def create_proxy_handler2(): username = '******' password = '******' proxy_host = '123.158.63.130:8080' password_manager = request.HTTPPasswordMgrWithDefaultRealm() password_manager.add_password('', uri=proxy_host, user=username, passwd=password) handle_auth_proxy = request.ProxyBasicAuthHandler(password_manager) opener_auth = request.build_opener(handle_auth_proxy) response = opener_auth.open('http://www.baidu.com') print(response.read())
def get_content(spider_data): if spider_data['proxy_ip'] != '0.0.0.0': proxy_handler = request.ProxyHandler({'http': spider_data['proxy_ip']}) proxy_auth_handler = request.ProxyBasicAuthHandler() opener = request.build_opener(proxy_handler, proxy_auth_handler) request.install_opener(opener) forged_header = { 'User-Agent': spider_data['user_agent'], 'Referer': spider_data['referer'], 'Host': spider_data['host'], spider_data['request_type']: spider_data['url'] } req = request.Request(spider_data['url'], headers=forged_header) html = request.urlopen(req, timeout=8) content = html.read() encoding = chardet.detect(content)['encoding'] #html = request.urlopen(req, timeout=8) return content.decode(encoding)
def pwd_demo(): # 私密代理授权的账户 密码 user = '******' pwd = '1234' # 私密代理IP proxy_server = '119.129.99.29:1231' # 构建一个密码管理对象,保存用户名和密码 pwdmgr = request.HTTPPasswordMgrWithDefaultRealm() # 添加账户信息 pwdmgr.add_password(None, proxy_server, user=user, passwd=pwd) # 构建一个代理基础用户名/密码验证的ProxyBasicAuthHandler对象,参数是创建的密码管理对象 proxy_handler = request.ProxyBasicAuthHandler(pwdmgr) # 通过build_opener方法使用Handler创建自定义opener opener = request.build_opener(proxy_handler) # 构造request请求 req = request.Request("http://www.baidu.com") # 发送请求并打印响应 response = opener.open(req) print(response.read().decode('utf-8'))
def resetProxies(self, httpProxyTuple): try: from ntlm import HTTPNtlmAuthHandler self.hasNTLM = True except ImportError: self.hasNTLM = False self.proxy_handler = proxyhandlers.ProxyHandler( proxyDirFmt(httpProxyTuple)) self.proxy_auth_handler = proxyhandlers.ProxyBasicAuthHandler() self.http_auth_handler = proxyhandlers.HTTPBasicAuthHandler() if self.hasNTLM: self.ntlm_auth_handler = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler() self.opener = proxyhandlers.build_opener(self.proxy_handler, self.ntlm_auth_handler, self.proxy_auth_handler, self.http_auth_handler) else: self.opener = proxyhandlers.build_opener(self.proxy_handler, self.proxy_auth_handler, self.http_auth_handler)
def use_proxy3(): from urllib import request # case 3: authenticated proxy 2 password = request.HTTPPasswordMgrWithDefaultRealm() proxy_auth = request.ProxyBasicAuthHandler(password) password.add_password(realm=None, uri="http://127.0.0.1:1080", user='******', passwd='password') proxy_support = request.ProxyHandler({ "http": "http://127.0.0.1:1080", "https": "http://127.0.0.1:1080" }) opener = request.build_opener(proxy_support, proxy_auth) # request.install_opener(opener) # r2 = request.urlopen('http://www.twitter.com') # or this blow: r2 = opener.open('https://www.twitter.com') print(r2.read(100))
def main(): name = "test" pswd = "123456" webserver = "192.168.78.1" # 很少用到这种情况 # 创建一个密码管理对象,用来保存和http请求相关的授权账号信息 pswd_mgr = urllib2.HTTPPasswordMgrWithPriorAuth() # 添加一个账号授权信息,第一个参数realm(域)如果没有就天None,然后依次是站点ip,用户名,密码 pswd_mgr.add_password(None, webserver, name, pswd) # http基础验证处理器类 http_auth_handler = urllib2.HTTPBasiAuthHandler(pswd_mgr) # 代理基础验证处理器类,(代理验证用ProxyHandler({"http":"user:pswd@ip:port"})更方便) proxy_auth_handler = urllib2.ProxyBasicAuthHandler(pswd_mgr) # build_opener可以添加多个处理器 opener = urllib2.build_opener(http_auth_handler, proxy_auth_handler) request = urllib2.Request(f"http://{webserver}") response = opener.open(request) print(response.read())
def opener(self, purl, puser=None, ppass=None, pscheme=('http', 'https')): if REDIRECT: h1, h2 = REDIRECT.BindableHTTPHandler, REDIRECT.BindableHTTPSHandler else: h1, h2 = ul2.HTTPHandler, ul2.HTTPSHandler handlers = [h1(), h2(), ul2.HTTPCookieProcessor(self.cookies)] if purl: handlers += [ ul2.ProxyHandler({ps: purl for ps in pscheme}), ] #handlers += [ul2.ProxyHandler({ps:purl}) for ps in pscheme] if puser and ppass: pm = ul2.HTTPPasswordMgrWithDefaultRealm() pm.add_password(None, purl, puser, ppass) handlers += [ ul2.ProxyBasicAuthHandler(pm), ] return ul2.build_opener(*handlers)
def getReq(): global init if init: return print('start setting the proxy! ') user = '******' passwd = 'maxzhang3' url = 'proxy.ict:8080' proxy_support = request.ProxyHandler({'http':'http://%s:%s@%s' %(user,passwd,url)}) passwdmgr= request.HTTPPasswordMgrWithDefaultRealm() passwdmgr.add_password(None,'hq.sinajs.cn','aspac\\axesr','maxzhang3') from ntlm import HTTPNtlmAuthHandler auth_NTLM = HTTPNtlmAuthHandler.ProxyNtlmAuthHandler(passwdmgr) proxy_auth_handler = request.ProxyBasicAuthHandler(passwdmgr) opener = request.build_opener(proxy_support) request.install_opener(opener) init = True
def soup1(url): try: username='******' password=getpass.getpass('input password:'******'www.someproxyaddress.com:8080' proxy_handler = u.ProxyHandler({'http':'%s'%(proxy)}) password_mgr = u.HTTPPasswordMgrWithDefaultRealm() proxy_auth_handler = u.ProxyBasicAuthHandler(password_mgr) proxy_auth_handler.add_password(None, 'http://%s'%(proxy),'%s'%(username), '%s'%(password)) opener = u.build_opener(proxy_handler,proxy_auth_handler) req = u.Request(url,headers={'User-Agent': 'Mozilla/5.0'}) r = opener.open(req) result = r.read() soup=bs(result,'html.parser') print(url) return soup except Exception as e: print(e)
email = input('Email: ') passwd = input('Password: '******'username', email), ('password', passwd), ('entry', 'mweibo'), ('client_id', ''), ('savestate', '1'), ('ec', ''), ('pagerefer', 'https://passport.weibo.cn/signin/welcome?entry=mweibo&r=http%3A%2F%2Fm.weibo.cn%2F%3Fjumpfrom%3Dweibocom&jumpfrom=weibocom') ]) req = request.Request('https://passport.weibo.cn/sso/login') req.add_header('Origin', 'https://passport.weibo.cn') req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25') req.add_header('Referer', 'https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=http%3A%2F%2Fm.weibo.cn%2F%3Fjumpfrom%3Dweibocom') with request.urlopen(req, data=login_data.encode('utf-8')) as f: print('Status:', f.status, f.reason) for k, v in f.getheaders(): print('%s: %s' % (k, v)) print('Data:', f.read().decode('utf-8')) # with proxy and proxy auth: proxy_handler = request.ProxyHandler({'http': 'http://www.example.com:3128/'}) proxy_auth_handler = request.ProxyBasicAuthHandler() proxy_auth_handler.add_password('realm', 'host', 'username', 'password') opener = request.build_opener(proxy_handler, proxy_auth_handler) with opener.open('http://www.example.com/login.html') as f: pass
# 创一个cookie处理器 cookies = request.HTTPCookieProcessor(cookie) # 以它为参数,创建opener对象 opener = request.build_opener(cookies) # 使用这个opener 来发请求 res =opener.open('https://www.baidu.com/') print(cookies.cookiejar) #3. 设置代理 #运行爬虫的时候,经常会出现被封IP的情况,这时我们就需要使用ip代理来处理,urllib的IP代理的设置如下: url ='http://httpbin.org/ip' #代理地址 proxy ={'http':'172.0.0.1:3128'} # 代理处理器 proxies =request.ProxyBasicAuthHandler(proxy) # 创建opener对象 opener = request.build_opener(proxies) re =opener.open(url) print(re.read().decode()) """ urlib库中的类或或者方法,在发送网络请求后,都会返回一个urllib.response的对象。 它包含了请求回来的数据结果。它包含了一些属性和方法,供我们处理返回的结果 read() 获取响应返回的数据,只能用一次 readline() 读取一行 info() 获取响应头信息 geturl() 获取访问的url getcode() 返回状态码
from urllib import request """ request.HTTPPasswordMgrWithDefaultRealm 密码管理对象,用来保存和HTTP请求相关的授权信息 request.ProxyBasicAuthHandler 授权代理处理器 request.HTTPBasicAuthHandler 验证web客户端的授权处理器 """ usn = 'test' pwd = '123456' web_server = '122.114.31.177:808' # 构建一个密码管理对象 password_mgr = request.HTTPPasswordMgrWithDefaultRealm() # 添加授权用户信息 password_mgr.add_password(None, web_server, usn, pwd) # 以密码管理对象为参数,创建auth handler # 注意这里处理的是web服务器的验证信息,不是代理服务器的验证信息 httpauth_handler = request.HTTPBasicAuthHandler(password_mgr) # 这个才是处理代理服务器验证信息的handler,可以同时作为创建opener的参数 proxyauth_handler = request.ProxyBasicAuthHandler(password_mgr) opener = request.build_opener(httpauth_handler, proxyauth_handler) request_obj = request.Request('http://' + web_server) response = opener.open(request_obj) print(response.read())
('client_id',''), ('savestate','1'), ('ec',''), ('pagerefer', 'https://passport.weibo.cn/signin/welcome?entry=mweibo&r=http%3A%2F%2Fm.weibo.cn%2F') ]) print(login_data) req = request.Request('https://passport.weibo.cn/sso/login') req.add_header('Origin', 'https://passport.weibo.cn') req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25') req.add_header('Referer', 'https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=http%3A%2F%2Fm.weibo.cn%2F') # 开始请求 with request.urlopen(req,data=login_data.encode('utf-8')) as f: print('Status:',f.status,f.reason) for k,v in f.getheaders(): print('%s:%s' % (k,v)) print('Data:',f.read().decode('utf-8')) ''' ''' Handler - 通过一个proxy去访问网站 ''' proxyHandler = request.ProxyHandler({'http': 'http://dev-proxy.oa.com:8080'}) proxyAuthHandler = request.ProxyBasicAuthHandler() proxyAuthHandler.add_password('realm', 'host', 'username', 'password') opener = request.build_opener(proxyHandler, proxyAuthHandler) with opener.open('http://www.qq.com') as f: print(f.read().decode('utf-8'))
from urllib import request #私密代理授权的账户 user = '******' #私密代理授权的密码 password = '******' #私密代理IP proxyserver = '61.158.130:16816' #构建一个密码管理对象,用来保存需要的用户名和密码 passwdmgr = request.HTTPPasswordMgrWithDefaultRealm() #添加用户的信息,第一个参数realm是与远程服务器的域信息,一般下写None,后面三个参数分别是 passwdmgr.add_password(None, proxyserver, None, None) #构建一个代理基础用户名/密码验证的ProxyBasicAuthHandler处理器对象,参数是创建的密码管理对象 #注意,这里不再使用普通ProxyHandler proxyauth_handler = request.ProxyBasicAuthHandler(None) opener = request.build_opener(proxyauth_handler) opener.add_handler = [] request.install_opener(opener) response = opener.open(request.Request('http://www.baidu.com/')) print(response.read().decode('utf-8')) print('popi')