def test_proxy_new(self, proxy): try: starttime = datetime.datetime.now() typestr = 'https' if proxy.find('https') >= 0 else 'http' proxy_handler = ProxyHandler({typestr: proxy}) opener = build_opener(proxy_handler) ##// 延迟3秒 url = "%s://%s" % (typestr, VALIDATOR_BASE_URL) reqs = opener.open(url, timeout=3) logger.info("ip: {}".format(reqs.read())) endtime = datetime.datetime.now() speed = (endtime - starttime).seconds * 1000 + ( endtime - starttime).microseconds self.sqlite3.update_proxy_speed(proxy, speed / 1000000) if reqs.status == 200: self.sqlite3.increase_proxy_score(proxy) logger.info("Validator √ {}| speed {}".format( proxy, speed / 1000000)) else: self.sqlite3.reduce_proxy_score(proxy) logger.info("Validator × {}".format(proxy)) except: self.sqlite3.reduce_proxy_score(proxy) logger.info("Validator × {}".format(proxy))
def __init__(self, _=None): """Build self._urllib_opener""" proxy = super().__str__() if proxy == "None": self._urllib_opener = build_opener() return components = list(re.match(self._match_regexp, proxy).groups()) self.scheme, self.host, self.port = components self.components = components if self.scheme == "socks4": socks4_handler = SocksiPyHandler(socks.PROXY_TYPE_SOCKS4, self.host, int(self.port)) self._urllib_opener = build_opener(socks4_handler) elif self.scheme == "socks5": socks5_handler = SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, self.host, int(self.port)) self._urllib_opener = build_opener(socks5_handler) else: proxy_handler = ProxyHandler({'http': proxy, 'https': proxy}) self._urllib_opener = build_opener(proxy_handler)
def __init__(self): # error message self.error = None # establish connection self.session = build_opener() # add proxy handler if needed if config['proxy']: if any(config['proxies'].values()): self.session.add_handler(ProxyHandler(config['proxies'])) logger.debug("Proxy is set!") else: self.error = "Proxy enabled, but not set!" # change user-agent self.session.addheaders = [('User-Agent', config['ua'])] # load local cookies mcj = MozillaCookieJar() try: mcj.load(FILE_C, ignore_discard=True) key = 'phpbb2mysql_4_data' if [ True for c in mcj if c.name == key and c.expires > time.time() ]: logger.info("Local cookies is loaded") self.session.add_handler(HTTPCookieProcessor(mcj)) else: logger.info("Local cookies expired or bad") logger.debug(f"That we have: {[cookie for cookie in mcj]}") mcj.clear() self.login(mcj) except FileNotFoundError: self.login(mcj)
def set_proxy(proxy, user=None, password=""): """ Set the HTTP proxy for Python to download through. If ``proxy`` is None then tries to set proxy from environment or system settings. :param proxy: The HTTP proxy server to use. For example: 'http://proxy.example.com:3128/' :param user: The username to authenticate with. Use None to disable authentication. :param password: The password to authenticate with. """ if proxy is None: # Try and find the system proxy settings try: proxy = getproxies()["http"] except KeyError as e: raise ValueError("Could not detect default proxy settings") from e # Set up the proxy handler proxy_handler = ProxyHandler({"https": proxy, "http": proxy}) opener = build_opener(proxy_handler) if user is not None: # Set up basic proxy authentication if provided password_manager = HTTPPasswordMgrWithDefaultRealm() password_manager.add_password(realm=None, uri=proxy, user=user, passwd=password) opener.add_handler(ProxyBasicAuthHandler(password_manager)) opener.add_handler(ProxyDigestAuthHandler(password_manager)) # Override the existing url opener install_opener(opener)
def recaptcha_request(params): request_object = Request( url="https://{0}/".format( getattr(settings, "DRF_RECAPTCHA_DOMAIN", DEFAULT_RECAPTCHA_DOMAIN)), data=params, headers={ "Content-type": "application/x-www-form-urlencoded", "User-agent": "DRF reCAPTCHA", }, ) # Add proxy values to opener if needed. opener_args = [] proxies = getattr(settings, "DRF_RECAPTCHA_PROXY", {}) if proxies: opener_args = [ProxyHandler(proxies)] opener = build_opener(*opener_args) # Get response from POST to Google endpoint. return opener.open( request_object, timeout=getattr(settings, "DRF_RECAPTCHA_VERIFY_REQUEST_TIMEOUT", 10), )
def download(url, headers, proxy, num_retries, data=None): print('Downloading:', url) request = Request(url, data, headers) opener = build_opener() if proxy: proxy_params = {urllib.parse.urlparse(url).scheme: proxy} opener.add_handler(ProxyHandler(proxy_params)) try: response = opener.open(request) html = response.read() code = response.code except URLError as e: print('Download error:', e.reason) html = '' if hasattr(e, 'code'): code = e.code if num_retries > 0 and 500 <= code < 600: # retry 5XX HTTP errors html = download(url, headers, proxy, num_retries - 1, data) else: code = None if isinstance(html, bytes): html = html.decode('utf8') return html
def make_http_call(callname, data): """Make an IPC call via HTTP and wait for it to return. The contents of data will be expanded to kwargs and passed into the target function.""" from collections import OrderedDict try: # Python 3 from urllib.request import build_opener, install_opener, ProxyHandler, URLError, urlopen except ImportError: # Python 2 from urllib2 import build_opener, install_opener, ProxyHandler, URLError, urlopen import json debug('Handling HTTP IPC call to {}'.format(callname)) # don't use proxy for localhost url = 'http://127.0.0.1:{}/{}'.format( g.LOCAL_DB.get_value('ns_service_port', 8001), callname) install_opener(build_opener(ProxyHandler({}))) try: result = json.loads(urlopen(url=url, data=json.dumps(data).encode('utf-8'), timeout=16).read(), object_pairs_hook=OrderedDict) except URLError: raise BackendNotReady _raise_for_error(callname, result) return result
def curl(self, url, params=None, timeout=None): queryString = self.buildQS(params) self.traceField("URL", url) self.traceField("postData", queryString) proto = "https" if self._useHTTPS else "http" if self._proxy: opener = build_opener( HTTPHandler(), ValidHTTPSHandler(), ProxyHandler({proto: self._proxy})) else: opener = build_opener( HTTPHandler(), ValidHTTPSHandler()) queryString = queryString.encode('utf-8') currentOpener = urllib.request._opener if PY_3 else urllib2._opener install_opener(opener) if timeout: response = urlopen(url, queryString, timeout) else: response = urlopen(url, queryString) install_opener(currentOpener) result = response.read() result = result.decode('utf-8') return result
def main(): arg_parser() if not args.debug: logging.root.setLevel(logging.INFO) else: logging.root.setLevel(logging.DEBUG) if args.timeout: socket.setdefaulttimeout(args.timeout) if args.insecure: ssl._create_default_https_context = ssl._create_unverified_context args.certs = ssl.CERT_NONE else: certs = args.append_certs or [] try: import certifi except ImportError: pass else: certs.append(certifi.where()) if certs: context = ssl._create_default_https_context() for cert in certs: if os.path.isfile(cert): context.load_verify_locations(cert) elif os.path.isdir(cert): context.load_verify_locations(capath=cert) https_handler = HTTPSHandler(context=context) http.add_default_handler(https_handler) args.certs = certs else: args.certs = None proxies = None if args.proxy == 'system': proxies = getproxies() args.proxy = proxies.get('http') or proxies.get('https', 'none') args.proxy = args.proxy.lower() if not args.proxy.startswith(('http', 'socks', 'none')): args.proxy = 'http://' + args.proxy if args.proxy == 'none': proxies = {} elif args.proxy.startswith(('http', 'socks')): if args.proxy.startswith(('https', 'socks')): try: import extproxy except ImportError: logger.error('Please install ExtProxy to use proxy: ' + args.proxy) raise proxies = { 'http': args.proxy, 'https': args.proxy } proxy_handler = ProxyHandler(proxies) http.add_default_handler(proxy_handler) if args.no_http_cache: http.CACHED.set(0) #mkdir and cd to output dir if not args.output_dir == '.': try: if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) except: logger.warning('No permission or Not found ' + args.output_dir) logger.warning('use current folder') args.output_dir = '.' if os.path.exists(args.output_dir): os.chdir(args.output_dir) exit = 0 try: for url in args.video_urls: http.reset_headers() http.uninstall_cookie() try: m, u = url_to_module(url) if args.playlist: parser = m.parser_list m.start = args.start else: parser = m.parser info = parser(u) if isinstance(info, (GeneratorType, list)): for i in info: handle_videoinfo(i) else: handle_videoinfo(info) except AssertionError as e: logger.critical(str(e)) exit = 1 except (RuntimeError, NotImplementedError, SyntaxError) as e: logger.error(repr(e)) exit = 1 except KeyboardInterrupt: logger.info('Interrupted by Ctrl-C') except Exception as e: errmsg = str(e) logger.debug(errmsg, exc_info=True) if 'local issuer' in errmsg: logger.warning('Please install or update Certifi, and try again:\n' 'pip3 install certifi --upgrade') exit = 255 sys.exit(exit)
def installproxy(): print('Proxy opened. IP Address: 202.112.26.250') proxy_support = ProxyHandler({'http': '202.112.26.250:8080'}) opener = build_opener(proxy_support) install_opener(opener)
from urllib.request import Request, build_opener from fake_useragent import UserAgent from urllib.request import ProxyHandler url = "http://httpbin.org/get" headers = {"User-Agent": UserAgent().chrome} request = Request(url, headers=headers) # handler = ProxyHandler({"http": "username:password@ip:port"}) handler = ProxyHandler({"http": "398707160:[email protected]:16818"}) # handler = ProxyHandler({"http": "ip:port"}) handler = ProxyHandler({"http": "118.190.95.43:9001"}) opener = build_opener(handler) response = opener.open(request) print(response.read().decode())
def request(url, close=True, redirect=True, error=False, proxy=None, post=None, headers=None, mobile=False, limit=None, referer=None, cookie=None, output='', timeout='30'): handlers = [] if proxy is not None: handlers += [ ProxyHandler({'http': '{0}'.format(proxy)}), urllib_request.HTTPHandler ] opener = urllib_request.build_opener(*handlers) urllib_request.install_opener(opener) if output == 'cookie' or output == 'extended' or close is not True: cookies = cookielib.LWPCookieJar() handlers += [ urllib_request.HTTPHandler(), urllib_request.HTTPSHandler(), urllib_request.HTTPCookieProcessor(cookies) ] opener = urllib_request.build_opener(*handlers) urllib_request.install_opener(opener) try: if sys.version_info < (2, 7, 9): raise Exception() import ssl ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE handlers += [urllib_request.HTTPSHandler(context=ssl_context)] opener = urllib_request.build_opener(*handlers) urllib_request.install_opener(opener) except: pass try: headers.update(headers) except: headers = {} if 'User-Agent' in headers: pass elif not mobile is True: # headers['User-Agent'] = agent() headers['User-Agent'] = cache.get(randomagent, 1) else: headers['User-Agent'] = 'Apple-iPhone/701.341' if 'Referer' in headers: pass elif referer is None: headers['Referer'] = '%s://%s/' % (urlparse(url).scheme, urlparse(url).netloc) else: headers['Referer'] = referer if not 'Accept-Language' in headers: headers['Accept-Language'] = 'en-US' if 'Cookie' in headers: pass elif cookie is not None: headers['Cookie'] = cookie if redirect is False: class NoRedirection(urllib_error.HTTPError): def http_response(self, request, response): return response opener = urllib_request.build_opener(NoRedirection) urllib_request.install_opener(opener) try: del headers['Referer'] except: pass req = urllib_request.Request(url, data=post, headers=headers) try: response = urllib_request.urlopen(req, timeout=int(timeout)) except urllib_error.HTTPError as response: if response.code == 503: if 'cf-browser-verification' in response.read(5242880): netloc = '%s://%s' % (urlparse(url).scheme, urlparse(url).netloc) cf = cache.get(cfcookie, 168, netloc, headers['User-Agent'], timeout) headers['Cookie'] = cf request = urllib_request.Request(url, data=post, headers=headers) response = urllib_request.urlopen(request, timeout=int(timeout)) elif error is False: return elif error is False: return if output == 'cookie': try: result = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: result = cf except: pass elif output == 'response': if limit == '0': result = (str(response.code), response.read(224 * 1024)) elif limit is not None: result = (str(response.code), response.read(int(limit) * 1024)) else: result = (str(response.code), response.read(5242880)) elif output == 'chunk': try: content = int(response.headers['Content-Length']) except: content = (2049 * 1024) if content < (2048 * 1024): return result = response.read(16 * 1024) elif output == 'extended': try: cookie = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: cookie = cf except: pass content = response.headers result = response.read(5242880) return result, headers, content, cookie elif output == 'geturl': result = response.geturl() elif output == 'headers': content = response.headers return content else: if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) if close is True: response.close() return result
import random from urllib.request import ProxyHandler, build_opener, install_opener, urlopen, Request # 1.参数是一个字典 proxy_handler = ProxyHandler( {"http": "1050521852:[email protected]:16816"}) # 2.定制,创建一个opener opener = build_opener(proxy_handler) # 加入头信息,需要用元组的列表 opener.addheaders = [( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0' )] # 3.调用opener.open()方法打开url,从局部打开 #opener.open(req) # 3.安装一个全局的opener,安装后,urlopen()方法默认会用此opener打开网页 install_opener(opener) url = 'http://www.ipdizhichaxun.com/' req = Request(url) response = urlopen(req) html = response.read().decode('utf-8') print(html)
'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;Maxthon2.0)', 'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;TencentTraveler4.0)', 'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;360SE)' ] # 设置一个代理IP ip_list = [{ 'http': 'http://111.155.116.234:8123' }, { 'http': 'http://183.167.217.152:63000' }, { 'http': 'http://110.73.9.193:8123' }] # 设置请求头 headers = {'User-Agent': random.choice(user_agent_list)} # 设置IP代理对象 proxy_handler = ProxyHandler(random.choice(ip_list)) # 定义一个爬取糗事百科的函数 def download_qsbk(page): # 设置详细的url地址 abs_url = base_url + "page/" + str(page) + "/" # 创建一个request对象 request = urllib.request.Request(abs_url, headers=headers) # 根据IP代理对象设置opener对象 opener = build_opener(proxy_handler) # 对网址发起访问,拿到源代码 resoponse = opener.open(request).read().decode('utf-8') pattern_obj = re.compile( r'<div class="author clearfix">.*?<h2>(.*?)</h2>.*?class="articleGender .*?Icon">(.*?)</div>.*?<div class="content">.*?<span>(.*?)</span>.*?<div class="stats">.*?<i class="number">(.*?)</i>.*?<i class="number">(.*?)</i>', re.S)
from urllib.error import URLError from urllib.request import ProxyHandler, build_opener proxy = '111.155.116.245:8123' proxy_handler = ProxyHandler({ 'http': 'http://' + proxy, 'https': 'https://' + proxy }) opener = build_opener(proxy_handler) try: response = opener.open('http://httpbin.org/get') print(response.read().decode('utf-8')) except URLError as e: print(e.reason)
def set_proxies(proxies): install_opener(build_opener(ProxyHandler(proxies), CacheFTPHandler))
#刷新时间是否遵循夏令时? now = time() + timezone_PST refreshtime = strftime('%y %j', localtime(now + 86400)) refreshtime = mktime(strptime(refreshtime, '%y %j')) return refreshtime - now nappid = 0 nLock = threading.Lock() badappids = LRUCache(len(GC.GAE_APPIDS)) qGAE = Queue.LifoQueue() for _ in range(GC.GAE_MAXREQUESTS * len(GC.GAE_APPIDS)): qGAE.put(True) proxy_server = 'http://127.0.0.1:%d' % GC.LISTEN_AUTO_PORT proxy_handler = ProxyHandler({'http': proxy_server, 'https': proxy_server}) context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) context.verify_mode = ssl.CERT_NONE https_handler = HTTPSHandler(context=context) proxy_opener = build_opener(proxy_handler, https_handler) def check_appid_exists(appid): response = proxy_opener.open('https://%s.appspot.com/' % appid) return response.status == 200 def get_appid(): global nappid with nLock: while True:
def open_url(url, data=None, headers=None, method=None, cookiejar=None, follow_redirects=True, raise_errors=None): """Return a urllib http response""" try: # Python 3 from urllib.error import HTTPError, URLError from urllib.parse import unquote from urllib.request import build_opener, HTTPCookieProcessor, ProxyHandler, Request except ImportError: # Python 2 from urllib2 import build_opener, HTTPError, HTTPCookieProcessor, ProxyHandler, Request, URLError, unquote opener_args = [] if not follow_redirects: opener_args.append(NoRedirection) if cookiejar is not None: opener_args.append(HTTPCookieProcessor(cookiejar)) proxies = get_proxies() if proxies: opener_args.append(ProxyHandler(proxies)) opener = build_opener(*opener_args) if not headers: headers = dict() req = Request(url, headers=headers) if data is not None: req.data = data log(2, 'URL post: {url}', url=unquote(url)) # Make sure we don't log the password debug_data = data if 'password' in to_unicode(debug_data): debug_data = '**redacted**' log(2, 'URL post data: {data}', data=debug_data) else: log(2, 'URL get: {url}', url=unquote(url)) if method is not None: req.get_method = lambda: method if raise_errors is None: raise_errors = list() try: return opener.open(req) except HTTPError as exc: if isinstance(raise_errors, list) and 401 in raise_errors or raise_errors == 'all': raise if hasattr(req, 'selector'): # Python 3.4+ url_length = len(req.selector) else: # Python 2.7 url_length = len(req.get_selector()) if exc.code == 400 and 7600 <= url_length <= 8192: ok_dialog(heading='HTTP Error 400', message=localize(30967)) log_error( 'HTTP Error 400: Probably exceeded maximum url length: ' 'VRT Search API url has a length of {length} characters.', length=url_length) return None if exc.code == 413 and url_length > 8192: ok_dialog(heading='HTTP Error 413', message=localize(30967)) log_error( 'HTTP Error 413: Exceeded maximum url length: ' 'VRT Search API url has a length of {length} characters.', length=url_length) return None if exc.code == 431: ok_dialog(heading='HTTP Error 431', message=localize(30967)) log_error( 'HTTP Error 431: Request header fields too large: ' 'VRT Search API url has a length of {length} characters.', length=url_length) return None if exc.code == 401: ok_dialog(heading='HTTP Error {code}'.format(code=exc.code), message='{}\n{}'.format(url, exc.reason)) log_error('HTTP Error {code}: {reason}', code=exc.code, reason=exc.reason) return None if exc.code in (400, 403) and exc.headers.get( 'Content-Type') and 'application/json' in exc.headers.get( 'Content-Type'): return exc ok_dialog(heading='HTTP Error {code}'.format(code=exc.code), message='{}\n{}'.format(url, exc.reason)) log_error('HTTP Error {code}: {reason}', code=exc.code, reason=exc.reason) return None except URLError as exc: ok_dialog(heading=localize(30968), message=localize(30969)) log_error('URLError: {error}\nurl: {url}', error=exc.reason, url=url) return None except SSLError as exc: # TODO: Include the error message in the notification window ok_dialog(heading=localize(30968), message=localize(30969)) if hasattr( exc, 'reason'): # Python 2.7.9+, but still failed on Python 2.7.16 log_error('SSLError: {error} ({library})\nurl: {url}', error=exc.reason, library=exc.library, url=url) elif isinstance(exc, list): log_error('SSLError: {error} ({errno})\nurl: {url}', errno=exc[0], error=exc[1], url=url) else: log_error('SSLError: {error}\nurl: {url}', error=str(exc), url=url) return None except timeout as exc: ok_dialog(heading=localize(30968), message=localize(30969)) log_error('Timeout: {error}\nurl: {url}', error=exc.reason, url=url) return None
def _build_opener(apiurl): from osc.core import __version__ global config if 'last_opener' not in _build_opener.__dict__: _build_opener.last_opener = (None, None) if apiurl == _build_opener.last_opener[0]: return _build_opener.last_opener[1] # respect no_proxy env variable if proxy_bypass(apiurl): # initialize with empty dict proxyhandler = ProxyHandler({}) else: # read proxies from env proxyhandler = ProxyHandler() # workaround for http://bugs.python.org/issue9639 authhandler_class = HTTPBasicAuthHandler if sys.version_info >= (2, 6, 6) and sys.version_info < (2, 7, 1) \ and not 'reset_retry_count' in dir(HTTPBasicAuthHandler): print('warning: your urllib2 version seems to be broken. ' \ 'Using a workaround for http://bugs.python.org/issue9639', file=sys.stderr) class OscHTTPBasicAuthHandler(HTTPBasicAuthHandler): def http_error_401(self, *args): response = HTTPBasicAuthHandler.http_error_401(self, *args) self.retried = 0 return response def http_error_404(self, *args): self.retried = 0 return None authhandler_class = OscHTTPBasicAuthHandler elif sys.version_info >= (2, 6, 6) and sys.version_info < (2, 7, 1): class OscHTTPBasicAuthHandler(HTTPBasicAuthHandler): def http_error_404(self, *args): self.reset_retry_count() return None authhandler_class = OscHTTPBasicAuthHandler elif sys.version_info >= (2, 6, 5) and sys.version_info < (2, 6, 6): # workaround for broken urllib2 in python 2.6.5: wrong credentials # lead to an infinite recursion class OscHTTPBasicAuthHandler(HTTPBasicAuthHandler): def retry_http_basic_auth(self, host, req, realm): # don't retry if auth failed if req.get_header(self.auth_header, None) is not None: return None return HTTPBasicAuthHandler.retry_http_basic_auth( self, host, req, realm) authhandler_class = OscHTTPBasicAuthHandler options = config['api_host_options'][apiurl] # with None as first argument, it will always use this username/password # combination for urls for which arg2 (apisrv) is a super-url authhandler = authhandler_class( \ HTTPPasswordMgrWithDefaultRealm()) authhandler.add_password(None, apiurl, options['user'], options['pass']) if options['sslcertck']: try: from . import oscssl from M2Crypto import m2urllib2 except ImportError as e: print(e) raise NoSecureSSLError( 'M2Crypto is needed to access %s in a secure way.\nPlease install python-m2crypto.' % apiurl) cafile = options.get('cafile', None) capath = options.get('capath', None) if not cafile and not capath: for i in ['/etc/pki/tls/cert.pem', '/etc/ssl/certs']: if os.path.isfile(i): cafile = i break elif os.path.isdir(i): capath = i break if not cafile and not capath: raise oscerr.OscIOError(None, 'No CA certificates found') ctx = oscssl.mySSLContext() if ctx.load_verify_locations(capath=capath, cafile=cafile) != 1: raise oscerr.OscIOError(None, 'No CA certificates found') opener = m2urllib2.build_opener( ctx, oscssl.myHTTPSHandler(ssl_context=ctx, appname='osc'), HTTPCookieProcessor(cookiejar), authhandler, proxyhandler) else: handlers = [HTTPCookieProcessor(cookiejar), authhandler, proxyhandler] try: # disable ssl cert check in python >= 2.7.9 ctx = ssl._create_unverified_context() handlers.append(HTTPSHandler(context=ctx)) except AttributeError: pass print( "WARNING: SSL certificate checks disabled. Connection is insecure!\n", file=sys.stderr) opener = build_opener(*handlers) opener.addheaders = [('User-agent', 'osc/%s' % __version__)] _build_opener.last_opener = (apiurl, opener) return opener
from urllib.error import URLError from urllib.request import ProxyHandler, build_opener proxy_handler = ProxyHandler({ 'http': 'http://127.0.0.1:9743', 'https': 'https://127.0.0.1:9743', }) # 构建ProxyHandler 对象的参数是一个字典,键名是协议类型,键值是代理链接,可以添加多个代理 opener = build_opener( proxy_handler) # 利用build_opener() 和ProxyHandler对象构造一个Opener,之后发送请求即可。 try: response = opener.open("https://www.baidu.com") print(response.read().decode('utf-8')) except URLError as e: print(e.reason)
def _perform_http_request( self, *, http_verb: str = "GET", url: str, body_params: Optional[Dict[str, any]] = None, headers: Dict[str, str], ) -> AuditLogsResponse: if body_params is not None: body_params = json.dumps(body_params) headers["Content-Type"] = "application/json;charset=utf-8" if self.logger.level <= logging.DEBUG: headers_for_logging = { k: "(redacted)" if k.lower() == "authorization" else v for k, v in headers.items() } self.logger.debug( f"Sending a request - url: {url}, body: {body_params}, headers: {headers_for_logging}" ) try: opener: Optional[OpenerDirector] = None # for security (BAN-B310) if url.lower().startswith("http"): req = Request( method=http_verb, url=url, data=body_params.encode("utf-8") if body_params is not None else None, headers=headers, ) if self.proxy is not None: if isinstance(self.proxy, str): opener = urllib.request.build_opener( ProxyHandler({ "http": self.proxy, "https": self.proxy }), HTTPSHandler(context=self.ssl), ) else: raise SlackRequestError( f"Invalid proxy detected: {self.proxy} must be a str value" ) else: raise SlackRequestError(f"Invalid URL detected: {url}") # NOTE: BAN-B310 is already checked above resp: Optional[HTTPResponse] = None if opener: resp = opener.open(req, timeout=self.timeout) # skipcq: BAN-B310 else: resp = urlopen( # skipcq: BAN-B310 req, context=self.ssl, timeout=self.timeout) charset: str = resp.headers.get_content_charset() or "utf-8" response_body: str = resp.read().decode(charset) resp = AuditLogsResponse( url=url, status_code=resp.status, raw_body=response_body, headers=resp.headers, ) _debug_log_response(self.logger, resp) return resp except HTTPError as e: # read the response body here charset = e.headers.get_content_charset() or "utf-8" body_params: str = e.read().decode(charset) resp = AuditLogsResponse( url=url, status_code=e.code, raw_body=body_params, headers=e.headers, ) if e.code == 429: # for backward-compatibility with WebClient (v.2.5.0 or older) resp.headers["Retry-After"] = resp.headers["retry-after"] _debug_log_response(self.logger, resp) return resp except Exception as err: self.logger.error( f"Failed to send a request to Slack API server: {err}") raise err
from urllib.request import ProxyHandler, build_opener from bs4 import BeautifulSoup proxy = ProxyHandler({'https': 'https://127.0.0.1:1080'}) opener = build_opener(proxy)
# # headers = { # 'User-Agent':UserAgent().random, # 'Cookie':'G1NZ_2132_saltkey=k9V77i9E; G1NZ_2132_lastvisit=1594960092; Hm_lvt_2b441fdd1b590975e0e2d2a00d32226c=1594978892; G1NZ_2132_sid=z2QQNp; PHPSESSID=4jkc48pq0faulomiv2ivsb9a05; G1NZ_2132_atarget=1; G1NZ_2132_popadv=a%3A0%3A%7B%7D; G1NZ_2132_st_p=0%7C1594978912%7C6dee56507423f6b74fbec4b9906bc189; G1NZ_2132_con_request_uri=https%3A%2F%2Fwww.kxdao.net%2Fconnect.php%3Fmod%3Dlogin%26op%3Dcallback%26referer%3Dforum.php%253Fmod%253Dforumdisplay%2526fid%253D46%2526page%253D1; G1NZ_2132_client_created=1594979291; G1NZ_2132_client_token=DB569F65958A1CFBCBDEFE8C5806373C; G1NZ_2132_ulastactivity=12ccQs2xgsHetd8iyzh3vPtj4hxGOYQUA%2F0K5iwYG7fZ7WMpXEfM; G1NZ_2132_auth=9ae4yhyzOF7Ua%2FLI59CXlkrWJWKKf6tCBw%2FKysoGsZ9PVELttpsJF3K1InoYR2U7uCRldyB49Tqghmvtq%2F4vR9EehA; G1NZ_2132_connect_login=1; G1NZ_2132_connect_is_bind=1; G1NZ_2132_connect_uin=DB569F65958A1CFBCBDEFE8C5806373C; G1NZ_2132_stats_qc_login=3; G1NZ_2132_myrepeat_rr=R0; G1NZ_2132_nofavfid=1; G1NZ_2132_dsu_amuppered=40001; G1NZ_2132_dsu_amupper=DQo8c3R5bGU%2BDQoucHBlcndibSB7cGFkZGluZzo2cHggMTJweDtib3JkZXI6MXB4IHNvbGlkICNDRENEQ0Q7YmFja2dyb3VuZDojRjJGMkYyO2xpbmUtaGVpZ2h0OjEuOGVtO2NvbG9yOiMwMDMzMDA7d2lkdGg6MjAwcHg7b3ZlcmZsb3c6aGlkZGVufQ0KLnBwZXJ3Ym0gLnRpbWVze2NvbG9yOiNmZjk5MDA7fQ0KLnBwZXJ3Ym0gIGF7ZmxvYXQ6cmlnaHQ7Y29sb3I6I2ZmMzMwMDt0ZXh0LWRlY29yYXRpb246bm9uZX0NCjwvc3R5bGU%2BDQoNCjxkaXYgY2xhc3M9InBwZXJ3Ym0iIGlkPSJwcGVyd2JfbWVudSIgc3R5bGU9ImRpc3BsYXk6IG5vbmUiID4NCjxBIEhSRUY9InBsdWdpbi5waHA%2FaWQ9ZHN1X2FtdXBwZXI6cHBlcmxpc3QiIHRhcmdldD0iX2JsYW5rIj7mn6XnnIvnrb7liLDmjpLooYw8L0E%2BDQo8c3Ryb25nPue0r%2BiuoeetvuWIsDxzcGFuIGNsYXNzPSJ0aW1lcyI%2BMzY0PC9zcGFuPuasoTwvc3Ryb25nPjxicj4NCg0KPHN0cm9uZz7ov57nu63nrb7liLA8c3BhbiBjbGFzcz0idGltZXMiPjA8L3NwYW4%2B5qyhPC9zdHJvbmc%2BPGJyPg0KDQo8c3Ryb25nPuS4iuasoeetvuWIsDogPHNwYW4gY2xhc3M9InRpbWVzIj4yMDIwLTA3LTE3IDE3OjQ4OjE5PC9zcGFuPjwvc3Ryb25nPg0KPC9kaXY%2BDQo%3D; G1NZ_2132_visitedfid=55D46D47; G1NZ_2132_smile=5D1; G1NZ_2132_st_t=40001%7C1594979858%7C9b1ab96fff424cdcc0d5e859cc070d72; G1NZ_2132_forum_lastvisit=D_47_1594978896D_46_1594978904D_55_1594979858; G1NZ_2132_lastact=1594979860%09home.php%09spacecp; G1NZ_2132_lastcheckfeed=40001%7C1594979860; Hm_lpvt_2b441fdd1b590975e0e2d2a00d32226c=1594979860' # } # # request = Request(url, headers=headers) # handler = ProxyHandler({'http':'113.194.29.233:9999'}) # opener = build_opener(handler) # response = opener.open(request) # print(response.read().decode()) from urllib.request import Request, build_opener, ProxyHandler from fake_useragent import UserAgent url = 'https://www.kxdao.net/forum.php?mod=forumdisplay&fid=119' headers = { 'User-Agent': UserAgent().random, 'Cookie': 'G1NZ_2132_saltkey=k9V77i9E; G1NZ_2132_lastvisit=1594960092; G1NZ_2132_atarget=1; G1NZ_2132_client_created=1594979291; G1NZ_2132_client_token=DB569F65958A1CFBCBDEFE8C5806373C; G1NZ_2132_auth=9ae4yhyzOF7Ua%2FLI59CXlkrWJWKKf6tCBw%2FKysoGsZ9PVELttpsJF3K1InoYR2U7uCRldyB49Tqghmvtq%2F4vR9EehA; G1NZ_2132_connect_login=1; G1NZ_2132_connect_is_bind=1; G1NZ_2132_connect_uin=DB569F65958A1CFBCBDEFE8C5806373C; G1NZ_2132_stats_qc_login=3; G1NZ_2132_myrepeat_rr=R0; G1NZ_2132_nofavfid=1; G1NZ_2132_smile=5D1; G1NZ_2132_sid=yYYJly; G1NZ_2132_lip=119.164.125.140%2C1594979291; G1NZ_2132_popadv=a%3A0%3A%7B%7D; G1NZ_2132_ulastactivity=8814vrAwXblPQ9dasW80eqeAzs6SXQZ4Hw5cA76%2BAt7ikmOwsuC6; G1NZ_2132_sendmail=1; PHPSESSID=cl127sn0td4smbb13t6hcrne74; Hm_lvt_2b441fdd1b590975e0e2d2a00d32226c=1594978892,1595057053; G1NZ_2132_dsu_amuppered=40001; G1NZ_2132_dsu_amupper=DQo8c3R5bGU%2BDQoucHBlcndibSB7cGFkZGluZzo2cHggMTJweDtib3JkZXI6MXB4IHNvbGlkICNDRENEQ0Q7YmFja2dyb3VuZDojRjJGMkYyO2xpbmUtaGVpZ2h0OjEuOGVtO2NvbG9yOiMwMDMzMDA7d2lkdGg6MjAwcHg7b3ZlcmZsb3c6aGlkZGVufQ0KLnBwZXJ3Ym0gLnRpbWVze2NvbG9yOiNmZjk5MDA7fQ0KLnBwZXJ3Ym0gIGF7ZmxvYXQ6cmlnaHQ7Y29sb3I6I2ZmMzMwMDt0ZXh0LWRlY29yYXRpb246bm9uZX0NCjwvc3R5bGU%2BDQoNCjxkaXYgY2xhc3M9InBwZXJ3Ym0iIGlkPSJwcGVyd2JfbWVudSIgc3R5bGU9ImRpc3BsYXk6IG5vbmUiID4NCjxBIEhSRUY9InBsdWdpbi5waHA%2FaWQ9ZHN1X2FtdXBwZXI6cHBlcmxpc3QiIHRhcmdldD0iX2JsYW5rIj7mn6XnnIvnrb7liLDmjpLooYw8L0E%2BDQo8c3Ryb25nPue0r%2BiuoeetvuWIsDxzcGFuIGNsYXNzPSJ0aW1lcyI%2BMzY1PC9zcGFuPuasoTwvc3Ryb25nPjxicj4NCg0KPHN0cm9uZz7ov57nu63nrb7liLA8c3BhbiBjbGFzcz0idGltZXMiPjE8L3NwYW4%2B5qyhPC9zdHJvbmc%2BPGJyPg0KDQo8c3Ryb25nPuS4iuasoeetvuWIsDogPHNwYW4gY2xhc3M9InRpbWVzIj4yMDIwLTA3LTE4IDE1OjI0OjEzPC9zcGFuPjwvc3Ryb25nPg0KPC9kaXY%2BDQo%3D; G1NZ_2132_visitedfid=119D55D46D47; G1NZ_2132_st_t=40001%7C1595057262%7C3a9b077c73260e86bb48dfad3a200b35; G1NZ_2132_forum_lastvisit=D_47_1594978896D_46_1594978904D_55_1594980257D_119_1595057262; Hm_lpvt_2b441fdd1b590975e0e2d2a00d32226c=1595057264; G1NZ_2132_lastact=1595057263%09home.php%09spacecp; G1NZ_2132_checkpm=1; G1NZ_2132_lastcheckfeed=40001%7C1595057263; G1NZ_2132_checkfollow=1' } request = Request(url, headers=headers) handler = ProxyHandler({'http': '120.198.76.45:41443'}) opener = build_opener(handler) response = opener.open(request) print(response.read().decode())
data = html.xpath('(//table[contains(@class, "wikitable sortable"' + ')])[1]/tbody/tr[2]/td//text()') data_header = html.xpath('(//table[contains(@class, "wikitable sortable"' + ')])[1]/tbody/tr/th/text()') data = proc_data(data, data_header, country) if data.size == 5: return '"wikitable sortable"' else: return '"wikitable"' # ---1.1 preparation--- # set proxy proxy_handler = ProxyHandler({ 'http': 'http://127.0.0.1:57858', 'https': 'https://127.0.0.1:57858' }) opener = build_opener(proxy_handler) install_opener(opener) # get and set page url publicURL = 'https://en.wikipedia.org' year = '2008' url = 'https://en.wikipedia.org/wiki/' + year + '_Summer_Olympics_medal_table' # set UA,disguise crawler as browser header = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/77.0.3865.90 Safari/537.36" }
def __init__(self): ''' Initialize resumepoints, relies on XBMC vfs and a special VRT token ''' self._resumepoints = dict() # Our internal representation install_opener(build_opener(ProxyHandler(get_proxies())))
# -*- coding: UTF-8 -*- #处理爬虫请求与排序的模块 from urllib.request import Request from urllib.request import urlopen from urllib.request import ProxyHandler from urllib.request import build_opener from urllib.request import install_opener import requests import conf proxy_info = {'host': 'web-proxy.oa.com', 'port': 8080} proxy_support = ProxyHandler({"http": "http://%(host)s:%(port)d" % proxy_info}) opener = build_opener(proxy_support) install_opener(opener) #用爬虫获取bug管理系统中一个月的任务report数据 #headers为请求头,start_m为开始的月份,end_m为结束的月份 def get_month_data(headers, start_m, end_m, project, type): if type.find(' ') > 0: type = type.replace(' ', '+') type = "\"" + type + "\"" session = requests.Session() #请求的地址 url = 'https://issues.apache.org/jira/sr/jira.issueviews:searchrequest-xml/temp/SearchRequest.xml?' \ 'jqlQuery=project+%3D+{project}+AND+issuetype+%3D+{type}+AND+created+%3E' \ '%3D+{start_m}+AND+created+%3C%3D+{end_m}+' \ 'ORDER+BY+priority+DESC%2C+updated+DESC&tempMax=1000'.format(project=project,type=type,start_m=start_m, end_m=end_m) #打印地址便于查错 print(str(url))
def get_http_raw(url=None, retry_count=3, headers=None, request_type='GET', form_data=None, timeout=5, proxy_opener=None): """ The function for retrieving a raw HTML result via HTTP. Args: url (:obj:`str`): The URL to retrieve (required). retry_count (:obj:`int`): The number of times to retry in case socket errors, timeouts, connection resets, etc. are encountered. Defaults to 3. headers (:obj:`dict`): The HTTP headers. The Accept header defaults to 'text/html'. request_type (:obj:`str`): Request type 'GET' or 'POST'. Defaults to 'GET'. form_data (:obj:`dict`): Optional form POST data. timeout (:obj:`int`): The default timeout for socket connections in seconds. Defaults to 5. proxy_opener (:obj:`OpenerDirector`): default to None and will create a new one inside the function. Returns: str: The raw data. Raises: HTTPLookupError: The HTTP lookup failed. """ # Proxy opener. if isinstance(proxy_opener, OpenerDirector): opener = proxy_opener else: handler = ProxyHandler() opener = build_opener(handler) if headers is None: headers = {'Accept': 'text/html'} enc_form_data = None if form_data: enc_form_data = urlencode(form_data) try: # Py 2 inspection will alert on the encoding arg, no harm done. enc_form_data = bytes(enc_form_data, encoding='ascii') except TypeError as e: # pragma: no cover log.debug(e, exc_info=True) try: # Create the connection for the HTTP query. log.debug('HTTP query at {}'.format(url)) try: # Py 2 inspection alert bypassed by using kwargs dict. conn = Request(url=url, data=enc_form_data, headers=headers, **{'method': request_type}) except TypeError: # pragma: no cover conn = Request(url=url, data=enc_form_data, headers=headers) data = opener.open(conn, timeout=timeout) try: d = data.readall().decode('ascii', 'ignore') except AttributeError: # pragma: no cover d = data.read().decode('ascii', 'ignore') return str(d) except (URLError, socket.timeout, socket.error) as e: # Check needed for Python 2.6, also why URLError is caught. try: # pragma: no cover if not isinstance(e.reason, (socket.timeout, socket.error)): raise HTTPLookupError('HTTP lookup failed for {0}.' ''.format(url)) except AttributeError: # pragma: no cover pass log.debug('HTTP query socket error: {0}'.format(e)) if retry_count > 0: log.debug('HTTP query retrying (count: {0})'.format( str(retry_count))) return get_http_raw(url=url, retry_count=retry_count - 1, headers=headers, request_type=request_type, form_data=form_data, timeout=timeout, proxy_opener=opener) else: raise HTTPLookupError('HTTP lookup failed for {0}.'.format(url)) except HTTPLookupError as e: # pragma: no cover raise e except Exception: # pragma: no cover raise HTTPLookupError('HTTP lookup failed for {0}.'.format(url))
def _perform_http_request(self, *, body: Dict[str, any], headers: Dict[str, str]) -> WebhookResponse: """Performs an HTTP request and parses the response. :param url: a complete URL to send data (e.g., https://hooks.slack.com/XXX) :param body: request body data :param headers: complete set of request headers :return: API response """ body = json.dumps(body) headers["Content-Type"] = "application/json;charset=utf-8" if self.logger.level <= logging.DEBUG: self.logger.debug( f"Sending a request - url: {self.url}, body: {body}, headers: {headers}" ) try: url = self.url opener: Optional[OpenerDirector] = None # for security (BAN-B310) if url.lower().startswith("http"): req = Request(method="POST", url=url, data=body.encode("utf-8"), headers=headers) if self.proxy is not None: if isinstance(self.proxy, str): opener = urllib.request.build_opener( ProxyHandler({ "http": self.proxy, "https": self.proxy }), HTTPSHandler(context=self.ssl), ) else: raise SlackRequestError( f"Invalid proxy detected: {self.proxy} must be a str value" ) else: raise SlackRequestError(f"Invalid URL detected: {url}") # NOTE: BAN-B310 is already checked above resp: Optional[HTTPResponse] = None if opener: resp = opener.open(req, timeout=self.timeout) # skipcq: BAN-B310 else: resp = urlopen( # skipcq: BAN-B310 req, context=self.ssl, timeout=self.timeout) charset: str = resp.headers.get_content_charset() or "utf-8" response_body: str = resp.read().decode(charset) resp = WebhookResponse( url=url, status_code=resp.status, body=response_body, headers=resp.headers, ) _debug_log_response(self.logger, resp) return resp except HTTPError as e: charset = e.headers.get_content_charset() or "utf-8" body: str = e.read().decode(charset) # read the response body here resp = WebhookResponse( url=url, status_code=e.code, body=body, headers=e.headers, ) if e.code == 429: # for backward-compatibility with WebClient (v.2.5.0 or older) resp.headers["Retry-After"] = resp.headers["retry-after"] _debug_log_response(self.logger, resp) return resp except Exception as err: self.logger.error( f"Failed to send a request to Slack API server: {err}") raise err
# -*- coding: UTF-8 -*- from urllib.error import URLError from urllib.request import ProxyHandler,build_opener proxy_handler = ProxyHandler({ 'http': 'http://127.0.0.1:9743', 'https ': 'https://127.0.0.1:9743' }) opener = build_opener(proxy_handler) try: response = opener.open('https://www.baidu.com') print(response.read().decode('utf-8')) except URLError as e: print(e.reason)
def __init__(self, _favorites, _resumepoints): ''' Constructor for the ApiHelper class ''' self._favorites = _favorites self._resumepoints = _resumepoints self._metadata = Metadata(_favorites, _resumepoints) install_opener(build_opener(ProxyHandler(get_proxies())))