def get_page(source): protocol = urlparse(source)[0] + "://" source = protocol + quote(source.replace(protocol, "")) if settings.USING_PROXY: http = SOCKSProxyManager( settings.proxy_type + "://" + settings.proxy_host + ":" + settings.proxy_port, cert_reqs="CERT_REQUIRED", # Force certificate check ca_certs=certifi.where(), # Path to the Certifi bundle ) else: http = urllib3.PoolManager( cert_reqs="CERT_REQUIRED", # Force certificate check ca_certs=certifi.where(), # Path to the Certifi bundle ) try: page = http.urlopen( "GET", source, preload_content=False, # timeout=urllib3.Timeout(connect=5.0, read=10.0), headers={'User-Agent': 'Mozilla'} ) except urllib3.exceptions.MaxRetryError as error: print("Connection error:", error) sys.exit(1) else: return page
def proxy_manager_for(self, proxy, **proxy_kwargs): """Return urllib3 ProxyManager for the given proxy. This method should not be called from dashboard code, and is only exposed for use when subclassing the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. :param proxy: The proxy to return a urllib3 ProxyManager for. :param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager. :returns: ProxyManager :rtype: urllib3.ProxyManager """ if proxy in self.proxy_manager: manager = self.proxy_manager[proxy] elif proxy.lower().startswith('socks'): username, password = get_auth_from_url(proxy) manager = self.proxy_manager[proxy] = SOCKSProxyManager( proxy, username=username, password=password, num_pools=self._pool_connections, maxsize=self._pool_maxsize, block=self._pool_block, **proxy_kwargs) else: proxy_headers = self.proxy_headers(proxy) manager = self.proxy_manager[proxy] = proxy_from_url( proxy, proxy_headers=proxy_headers, num_pools=self._pool_connections, maxsize=self._pool_maxsize, block=self._pool_block, **proxy_kwargs) return manager
def initHttp(proxy=None): if not proxy: try: import certifi http = urllib3.PoolManager( cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) return http except ImportError: logging.warn( 'certifi ImportError, Please install certifi. "pip install certifi"') http = urllib3.PoolManager() return http if proxy.startswith('http'): http = urllib3.ProxyManager(proxy) elif proxy.startswith('sock'): try: from urllib3.contrib.socks import SOCKSProxyManager http = SOCKSProxyManager(proxy) return http except ImportError: raise Exception( 'PROXY setup failed! - ImportError! Please install urllib3[socks]. "pip install urllib3[socks]"') else: raise Exception('Unknown proxy {}'.format(proxy)) return http
def _init_connection(self): """Function for initiating connection with remote server""" cert_reqs = 'CERT_NONE' if self._connection_properties.get('ca_cert_data'): LOGGER.info('Using CA cert to confirm identity.') cert_reqs = 'CERT_REQUIRED' self._connection_properties.update( self._connection_properties.pop('ca_cert_data')) if self.proxy: if self.proxy.startswith('socks'): LOGGER.info("Initializing a SOCKS proxy.") http = SOCKSProxyManager(self.proxy, cert_reqs=cert_reqs, maxsize=6, \ **self._connection_properties) else: LOGGER.info("Initializing a HTTP proxy.") http = ProxyManager(self.proxy, cert_reqs=cert_reqs, maxsize=6, \ **self._connection_properties) else: LOGGER.info("Initializing no proxy.") try: self._connection_properties.pop('ca_cert_data') except KeyError: pass http = PoolManager(cert_reqs=cert_reqs, maxsize=6, **self._connection_properties) self._conn = http.request
def proxy_manager_for(self, proxy, **proxy_kwargs): if proxy in self.proxy_manager: manager = self.proxy_manager[proxy] elif proxy.lower().startswith('socks'): username, password = get_auth_from_url(proxy) manager = self.proxy_manager[proxy] = SOCKSProxyManager( proxy, username=username, password=password, num_pools=self._pool_connections, maxsize=self._pool_maxsize, block=self._pool_block, assert_hostname=False, **proxy_kwargs) else: proxy_headers = self.proxy_headers(proxy) manager = self.proxy_manager[proxy] = proxy_from_url( proxy, proxy_headers=proxy_headers, num_pools=self._pool_connections, maxsize=self._pool_maxsize, block=self._pool_block, assert_hostname=False, **proxy_kwargs) return manager
def get_pool(self, req, use_cache=True): if req['proxy']: if req['proxy_type'] == 'socks5' and req['proxy_auth']: proxy_url = '%s://%s@%s' % (req['proxy_type'], req['proxy_auth'], req['proxy']) else: proxy_url = '%s://%s' % (req['proxy_type'], req['proxy']) pool_key = (req['proxy_type'], req['proxy'], bool(req['verify'])) if not use_cache or pool_key not in self.pools: if req['proxy_type'] == 'socks5': if req['verify']: pool = SOCKSProxyManager( proxy_url, cert_reqs='CERT_REQUIRED', ca_certs=certifi.where(), ) else: pool = SOCKSProxyManager(proxy_url) elif req['proxy_type'] == 'http': if req['proxy_auth']: proxy_headers = make_headers( proxy_basic_auth=req['proxy_auth']) else: proxy_headers = None if req['verify']: pool = ProxyManager( proxy_url, proxy_headers=proxy_headers, cert_reqs='CERT_REQUIRED', ca_certs=certifi.where(), ) else: pool = ProxyManager( proxy_url, proxy_headers=proxy_headers, ) else: raise error.IowebConfigError( 'Invalid value of request option `proxy_type`: %s' % req['proxy_type']) if use_cache: self.pools[pool_key] = pool else: pool = self.pools[pool_key] else: pool = self.pools[(None, None, bool(req['verify']))] return pool
def current_proxy(self, reset=False): if reset or self.proxy is None: if self.proxy_disabled: self.proxy = urllib3.PoolManager() else: print(f"Using proxy: {self.proxy_string}") self.proxy = SOCKSProxyManager(self.proxy_string) return self.proxy
def check_proxy(proxy, proxy_type): if proxy_type == 'socks': pool = SOCKSProxyManager('socks5://%s' % proxy) else: pool = urllib3.ProxyManager('http://%s' % proxy) retries = urllib3.Retry(total=None, connect=False, read=False, redirect=10, raise_on_redirect=False) timeout = urllib3.Timeout(connect=CONNECT_TIMEOUT, read=READ_TIMEOUT) op = { 'status': None, 'connect_time': None, 'read_time': None, 'error': None, } try: start_time = time.time() res = pool.request('GET', 'https://en.wikipedia.org/robots.txt', retries=retries, timeout=timeout, preload_content=False) connected_time = time.time() op['connect_time'] = round(connected_time - start_time, 2) data = res.read() op['read_time'] = round(time.time() - connected_time, 2) except Exception as ex: error = type(ex).__name__ op['error'] = error if error in ('NewConnectionError', 'ConnectTimeoutError'): op['status'] = 'connect_fail' elif error in ( 'ProtocolError', 'ReadTimeoutError', ): op['status'] = 'read_fail' else: raise Exception('Unexpected error: %s' % error) else: if b'# Wikipedia work bots:' in data: op['status'] = 'ok' else: op['status'] = 'data_fail' return op
def custom_proxy(self, proxy_string): if self.proxy_disabled: self.proxy = urllib3.PoolManager() else: print(f"Using proxy: {proxy_string}") self.proxy = SOCKSProxyManager(proxy_string) self.proxy_count = 0 return self.proxy
def get_tor_session(self): """ Configures and create the session to use a Tor Socks proxy. :return: urllib3.SOCKSProxyManager object. """ user_agent = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'} session = SOCKSProxyManager('socks5://{0}:{1}'.format(self.ip, self.socksport), cert_reqs='CERT_REQUIRED', ca_certs=certifi.where(), headers=user_agent) return session
def set_telepot_socks_proxy(url, username=None, password=None): # from https://github.com/nickoala/telepot/pull/386 telepot.api._onetime_pool_spec = (SOCKSProxyManager, dict(proxy_url=url, username=username, password=password, **_onetime_pool_params)) telepot.api._pools['default'] = SOCKSProxyManager(url, username=username, password=password, **_default_pool_params)
def __init__(self, con_pool_size=1, proxy_url=None, urllib3_proxy_kwargs=None, connect_timeout=5., read_timeout=5.): if urllib3_proxy_kwargs is None: urllib3_proxy_kwargs = dict() self._connect_timeout = connect_timeout kwargs = dict(maxsize=con_pool_size, cert_reqs='CERT_REQUIRED', ca_certs=certifi.where(), socket_options=HTTPConnection.default_socket_options + [ (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), ], timeout=urllib3.Timeout(connect=self._connect_timeout, read=read_timeout, total=None)) # Set a proxy according to the following order: # * proxy defined in proxy_url (+ urllib3_proxy_kwargs) # * proxy set in `HTTPS_PROXY` env. var. # * proxy set in `https_proxy` env. var. # * None (if no proxy is configured) if not proxy_url: proxy_url = os.environ.get('HTTPS_PROXY') or os.environ.get( 'https_proxy') if not proxy_url: if urllib3.contrib.appengine.is_appengine_sandbox(): # Use URLFetch service if running in App Engine mgr = urllib3.contrib.appengine.AppEngineManager() else: mgr = urllib3.PoolManager(**kwargs) else: kwargs.update(urllib3_proxy_kwargs) if proxy_url.startswith('socks'): try: from urllib3.contrib.socks import SOCKSProxyManager except ImportError: raise RuntimeError('PySocks is missing') mgr = SOCKSProxyManager(proxy_url, **kwargs) else: mgr = urllib3.proxy_from_url(proxy_url, **kwargs) if mgr.proxy.auth: # TODO: what about other auth types? auth_hdrs = urllib3.make_headers( proxy_basic_auth=mgr.proxy.auth) mgr.proxy_headers.update(auth_hdrs) self._con_pool = mgr
def pageRequest(url): global roundRobin proxy = SOCKSProxyManager('socks5://localhost:'+str(torPort), cert_reqs='CERT_REQUIRED', ca_certs=certifi.where(), headers={'user-agent': randomUserAgent(), 'Cookie': ''}) http = urllib3.PoolManager( 1, cert_reqs='CERT_REQUIRED', ca_certs=certifi.where(), headers={'user-agent': randomUserAgent(), 'Cookie': ''}) if roundRobin % 2: response = http.request('GET', url) else: if torSupport: response = proxy.request('GET', url) else: response = http.request('GET', url) roundRobin += 1 if not roundRobin % 60: newTorIdentity() return response.data
def set_telepot_socks_proxy(url, username=None, password=None): from urllib3.contrib.socks import SOCKSProxyManager from telepot.api import _default_pool_params, _onetime_pool_params tp.api._onetime_pool_spec = (SOCKSProxyManager, dict(proxy_url=url, username=username, password=password, **_onetime_pool_params)) tp.api._pools['default'] = SOCKSProxyManager(url, username=username, password=password, **_default_pool_params)
def __init__(self, handler, range_start, range_end): self.handler = handler self.write = handler.wfile.write self.url = handler.url self.scheme = handler.url_parts.scheme self.netloc = handler.url_parts.netloc self.headers = dict((k.title(), v) for k, v in handler.headers.items()) self.headers.update(self._headers) self.headers['Host'] = self.netloc self.range_start = range_start self.range_end = range_end self.delay_cache_size = self.max_size * self.threads * 4 self.delay_star_size = self.delay_cache_size * 2 self.max_threads = min(self.threads * 2, self.pool_size) if self.http is None: connection_pool_kw = { 'block': True, 'timeout': self.timeout, 'maxsize': self.pool_size, 'ca_certs': self.ca_certs[-1] # only allow one path } if self.proxy: if self.proxy.lower().startswith('socks'): from urllib3.contrib.socks import SOCKSProxyManager as ProxyManager else: ProxyManager = urllib3.ProxyManager http = ProxyManager(self.proxy, **connection_pool_kw) if ProxyManager is not urllib3.ProxyManager: http.connection_pool_kw['_socks_options']['rdns'] = True else: http = urllib3.PoolManager(**connection_pool_kw) self.__class__.http = http self.firstrange = range_start, range_start + self.first_size - 1 self.data_queue = queue.PriorityQueue() self.range_queue = queue.LifoQueue() self._started_threads = {}
def __init__(self, url, user_agent, cookies_string=False, custom_header=False, insecure_ssl='false', proxy=False): urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) self.__url = url self.__headers = dict() self.__headers[ 'User-Agent'] = self.__default_user_agent if user_agent == 'default' else user_agent if cookies_string: self.__headers['Cookie'] = cookies_string if custom_header: self.__parse_custom_header(custom_header) self.__verify = 'CERT_REQUIRED' if insecure_ssl == 'false' else 'CERT_NONE' if proxy: proxy_type = proxy.split('://')[0] if proxy_type == 'http' or proxy_type == 'https': self.__request_obj = urllib3.ProxyManager( proxy, ssl_version=ssl.PROTOCOL_TLSv1, timeout=self.__request_timeout, cert_reqs=self.__verify) else: self.__request_obj = SOCKSProxyManager( proxy, ssl_version=ssl.PROTOCOL_TLSv1, timeout=self.__request_timeout, cert_reqs=self.__verify) else: self.__request_obj = urllib3.PoolManager( ssl_version=ssl.PROTOCOL_TLSv1, timeout=self.__request_timeout, cert_reqs=self.__verify)
class TorProxy: def __init__(self): self.proxy = SOCKSProxyManager('socks5://127.0.0.1:9050') def new_ip(self): with Controller.from_port(port=9051) as controller: controller.authenticate(password="******") controller.signal(Signal.NEWNYM) controller.close() def request(self, url): self.new_ip() headers = {'User-Agent': UserAgent().random} rq = self.proxy.request('GET', url, headers=headers) return rq
def _get_connection_manager(self): pool_manager_init_args = { 'timeout': self._timeout } if self._ca_certs: pool_manager_init_args['cert_reqs'] = 'CERT_REQUIRED' pool_manager_init_args['ca_certs'] = self._ca_certs if self._proxy_url: if self._proxy_url.lower().startswith('sock'): from urllib3.contrib.socks import SOCKSProxyManager return SOCKSProxyManager(self._proxy_url, **pool_manager_init_args) return urllib3.ProxyManager(self._proxy_url, **pool_manager_init_args) return urllib3.PoolManager(**pool_manager_init_args)
def setup_network(core, prefs): retries = urllib3.Retry(backoff_factor=2, status_forcelist=set([500])) core.networking.pool = urllib3.PoolManager(retries=retries, maxsize=5) core.networking.pool.pool_classes_by_scheme = { 'http': PlexHTTPConnectionPool, 'https': PlexHTTPSConnectionPool, } if prefs['proxy_adr'] and prefs['proxy_adr'].startswith('http'): if prefs['proxy_type'] == 'SOCK5': core.networking.pool_proxy = SOCKSProxyManager(prefs['proxy_adr'], retries=retries, maxsize=5) else: core.networking.pool_proxy = urllib3.ProxyManager(prefs['proxy_adr'], retries=retries, maxsize=5) core.networking.pool_proxy.pool_classes_by_scheme = core.networking.pool.pool_classes_by_scheme core.networking.http_request = MethodType(urllib3_http_request, core.networking)
def get_pool_manager(proxy=None): if proxy is not None and proxy.startswith("socks"): from urllib3.contrib.socks import SOCKSProxyManager pm = SOCKSProxyManager( proxy, num_pools=10, ) elif proxy is not None: pm = urllib3.ProxyManager( proxy, num_pools=10, ) else: pm = urllib3.PoolManager( num_pools=10, ) return pm
def get_pool_manager(proxy=None, ssl_verify=True): if proxy is not None and proxy.startswith("socks"): from urllib3.contrib.socks import SOCKSProxyManager pm = SOCKSProxyManager( proxy, num_pools=10, ) elif proxy is not None: pm = urllib3.ProxyManager( proxy, num_pools=10, ) else: pool_kwargs = {"num_pools": 10} if ssl_verify is False: pool_kwargs["cert_reqs"] = ssl.CERT_NONE pm = urllib3.PoolManager(**pool_kwargs) return pm
def _get_connection_manager(self): pool_manager_init_args = { 'timeout': self.get_timeout() } if self._ca_certs: pool_manager_init_args['cert_reqs'] = 'CERT_REQUIRED' pool_manager_init_args['ca_certs'] = self._ca_certs if self._proxy_url: if self._proxy_url.lower().startswith('sock'): from urllib3.contrib.socks import SOCKSProxyManager return SOCKSProxyManager(self._proxy_url, **pool_manager_init_args) elif self._identify_http_proxy_auth(): self._proxy_url, self._basic_proxy_auth = self._seperate_http_proxy_auth() pool_manager_init_args['proxy_headers'] = urllib3.make_headers( proxy_basic_auth=self._basic_proxy_auth) return urllib3.ProxyManager(self._proxy_url, **pool_manager_init_args) return urllib3.PoolManager(**pool_manager_init_args)
def getConnectionPool(): proxyType = config.conf["onlineOCRGeneral"]["proxyType"] proxyAddress = config.conf["onlineOCRGeneral"]["proxyAddress"] msg = u"type:\n{0}\naddress:\n{1}".format( proxyType, proxyAddress ) if proxyType == u"http": pool = urllib3.ProxyManager( proxyAddress, # cert_reqs='CERT_REQUIRED', # ca_certs=certifi.where(), timeout=urllib3.Timeout(connect=10, read=10) ) msg += u"\nHTTP proxy\n{0}".format( pool ) log.io(msg) return pool elif proxyType == u"socks": pool = SOCKSProxyManager( proxyAddress, # cert_reqs='CERT_REQUIRED', # ca_certs=certifi.where(), timeout=urllib3.Timeout(connect=10, read=10) ) msg += u"\nSocks proxy\n{0}".format( pool ) log.io(msg) return pool else: pool = urllib3.PoolManager( # cert_reqs='CERT_REQUIRED', # ca_certs=certifi.where(), timeout=urllib3.Timeout(connect=10, read=10) ) msg += u"\nNo proxy\n{0}".format( pool ) log.io(msg) return pool
def __init_connection(self, url=None, proxy=False): """Function for initiating connection with remote server :param url: The URL of the remote system :type url: str """ self.__url = url if url else self.__url if self.get_proxy() and proxy: if self.get_proxy().startswith('socks'): LOGGER.info("Initializing a SOCKS proxy.") http = SOCKSProxyManager(self.get_proxy(), cert_reqs='CERT_NONE') else: LOGGER.info("Initializing a HTTP proxy.") http = ProxyManager(self.get_proxy(), cert_reqs='CERT_NONE') else: LOGGER.info("Initializing no proxy.") http = urllib3.PoolManager(cert_reqs='CERT_NONE') self._conn = http.request
def get_pool_manager(proxy=None, ssl_verify=True): if proxy is not None and proxy.startswith("socks"): from urllib3.contrib.socks import SOCKSProxyManager pm = SOCKSProxyManager( proxy, num_pools=10, ) elif proxy is not None: pm = urllib3.ProxyManager( proxy, num_pools=10, ) else: pm = urllib3.PoolManager( num_pools=10, # CERT_REQUIRED if ssl_verify is True or None (undefined) << default behaviour # CERT_NONE only if ssl_verify is False cert_reqs=ssl.CERT_NONE if ssl_verify is False else ssl.CERT_REQUIRED, ) return pm
def connect(): if args.proxy: proto, _ = args.proxy.split("://") if proto == "http": default_headers = urllib3.make_headers( proxy_basic_auth='%s:%s' % (args.username, args.password), ) http_proxy = urllib3.ProxyManager( args.proxy, headers=default_headers ) http = http_proxy.connection_from_url(args.uri["uri"]) elif proto in ["socks4", "socks5"]: http = SOCKSProxyManager(args.proxy) else: logging.error("Invalid proxy protocol. It must start with 'http://' or " "'socks[45]://'.") exit(1) else: http = urllib3.connection_from_url(args.uri["uri"]) return http
def _init_connection(self): """Function for initiating connection with remote server""" if self._connection_properties.get('ca_certs', None): LOGGER.info('Using CA cert to confirm identity.') cert_reqs = 'CERT_REQUIRED' else: LOGGER.info('Not using CA certificate.') cert_reqs = 'CERT_NONE' if self.proxy: if self.proxy.startswith('socks'): LOGGER.info("Initializing a SOCKS proxy.") http = SOCKSProxyManager(self.proxy, cert_reqs=cert_reqs, maxsize=6, \ **self._connection_properties) else: LOGGER.info("Initializing a HTTP proxy.") http = ProxyManager(self.proxy, cert_reqs=cert_reqs, maxsize=6, \ **self._connection_properties) else: LOGGER.info("Initializing no proxy.") http = PoolManager(cert_reqs=cert_reqs, maxsize=6, **self._connection_properties) self._conn = http.request
def _default_make_pool(http, proxy_info): """Creates a urllib3.PoolManager object that has SSL verification enabled and uses the certifi certificates.""" if not http.ca_certs: http.ca_certs = _certifi_where_for_ssl_version() ssl_disabled = http.disable_ssl_certificate_validation cert_reqs = 'CERT_REQUIRED' if http.ca_certs and not ssl_disabled else None if isinstance(proxy_info, collections.Callable): proxy_info = proxy_info() if proxy_info: if proxy_info.proxy_type == socks.PROXY_TYPE_SOCKS4 or proxy_info.proxy_type == socks.PROXY_TYPE_SOCKS5: if proxy_info.proxy_type == socks.PROXY_TYPE_SOCKS4: proxy_scheme = 'socks4' else: proxy_scheme = 'socks5' proxy_url = '{}://{}:{}'.format(proxy_scheme, proxy_info.proxy_host, proxy_info.proxy_port) username = proxy_info.proxy_user if proxy_info.proxy_user else None password = proxy_info.proxy_pass if proxy_info.proxy_pass else None headers = proxy_info.proxy_headers if proxy_info.proxy_headers else None return SOCKSProxyManager(proxy_url=proxy_url, username=username, password=password, headers=headers, ca_certs=http.ca_certs, cert_reqs=cert_reqs) if proxy_info.proxy_user and proxy_info.proxy_pass: proxy_url = 'http://{}:{}@{}:{}/'.format( proxy_info.proxy_user, proxy_info.proxy_pass, proxy_info.proxy_host, proxy_info.proxy_port, ) proxy_headers = urllib3.util.request.make_headers( proxy_basic_auth='{}:{}'.format( proxy_info.proxy_user, proxy_info.proxy_pass, )) else: proxy_url = 'http://{}:{}/'.format( proxy_info.proxy_host, proxy_info.proxy_port, ) proxy_headers = {} return urllib3.ProxyManager( proxy_url=proxy_url, proxy_headers=proxy_headers, ca_certs=http.ca_certs, cert_reqs=cert_reqs, ) return urllib3.PoolManager( ca_certs=http.ca_certs, cert_reqs=cert_reqs, )
def request(self): req = self._request if req.proxy: if req.proxy_userpwd: headers = make_headers(proxy_basic_auth=req.proxy_userpwd) else: headers = None proxy_url = '%s://%s' % (req.proxy_type, req.proxy) if req.proxy_type == 'socks5': pool = SOCKSProxyManager( proxy_url, cert_reqs='CERT_REQUIRED', ca_certs=certifi.where() ) # , proxy_headers=headers) else: pool = ProxyManager( proxy_url, proxy_headers=headers, cert_reqs='CERT_REQUIRED', ca_certs=certifi.where() ) else: pool = self.pool try: # Retries can be disabled by passing False: # http://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#module-urllib3.util.retry # Do not use False because of warning: # Converted retries value: False -> Retry(total=False, # connect=None, read=None, redirect=0, status=None) retry = Retry( total=False, connect=False, read=False, redirect=0, status=None, ) # The read timeout is not total response time timeout # It is the timeout on read of next data chunk from the server # Total response timeout is handled by Grab timeout = Timeout(connect=req.connect_timeout, read=req.timeout) #req_headers = dict((make_unicode(x), make_unicode(y)) # for (x, y) in req.headers.items()) if six.PY3: req_url = make_unicode(req.url) req_method = make_unicode(req.method) else: req_url = make_str(req.url) req_method = req.method req.op_started = time.time() try: res = pool.urlopen(req_method, req_url, body=req.data, timeout=timeout, retries=retry, headers=req.headers, preload_content=False) except UnicodeError as ex: raise error.GrabConnectionError('GrabInvalidUrl', ex) except exceptions.ReadTimeoutError as ex: raise error.GrabTimeoutError('ReadTimeoutError', ex) except exceptions.ConnectTimeoutError as ex: raise error.GrabConnectionError('ConnectTimeoutError', ex) except exceptions.ProtocolError as ex: # TODO: # the code # raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1]) # fails # with error TypeError: 'OSError' object is not subscriptable raise error.GrabConnectionError('ProtocolError', ex) except exceptions.SSLError as ex: raise error.GrabConnectionError('SSLError', ex) # WTF? self.request_head = b'' self.request_body = b'' self.request_log = b'' self._response = res
"""browse the world wide web and multimedia""" import os from urllib.parse import urlencode, quote_plus, unquote from urllib3.contrib.socks import SOCKSProxyManager import urllib3 import certifi SOCKS_PROXY = os.environ["SOCKS_PROXY"] HTTP = urllib3.PoolManager(num_pools=50, cert_reqs="CERT_REQUIRED", ca_certs=certifi.where()) PROXY = SOCKSProxyManager(SOCKS_PROXY, num_pools=50, cert_reqs="CERT_REQUIRED", ca_certs=certifi.where()) def url_encode(value): """url encode value using urllib.parse""" return urlencode(value, quote_via=quote_plus) def url_decode(value): """url decode value using urllib.parse""" return unquote(value) def http_validate(response, valid_status=200): """validate http response""" if response.status == valid_status:
def request(self): req = self._request if req.proxy: if req.proxy_userpwd: headers = make_headers(proxy_basic_auth=req.proxy_userpwd) else: headers = None proxy_url = '%s://%s' % (req.proxy_type, req.proxy) if req.proxy_type == 'socks5': pool = SOCKSProxyManager( proxy_url, cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) # , proxy_headers=headers) else: pool = ProxyManager(proxy_url, proxy_headers=headers, cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) else: pool = self.pool with self.wrap_transport_error(): # Retries can be disabled by passing False: # http://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html#module-urllib3.util.retry # Do not use False because of warning: # Converted retries value: False -> Retry(total=False, # connect=None, read=None, redirect=0, status=None) retry = Retry( total=False, connect=False, read=False, redirect=0, status=None, ) # The read timeout is not total response time timeout # It is the timeout on read of next data chunk from the server # Total response timeout is handled by Grab timeout = Timeout(connect=req.connect_timeout, read=req.timeout) #req_headers = dict((make_unicode(x), make_unicode(y)) # for (x, y) in req.headers.items()) if six.PY3: req_url = make_unicode(req.url) req_method = make_unicode(req.method) else: req_url = make_str(req.url) req_method = req.method req.op_started = time.time() try: res = pool.urlopen(req_method, req_url, body=req.data, timeout=timeout, retries=retry, headers=req.headers, preload_content=False) except UnicodeError as ex: raise error.GrabConnectionError('GrabInvalidUrl', ex) #except exceptions.ReadTimeoutError as ex: # raise error.GrabTimeoutError('ReadTimeoutError', ex) #except exceptions.ConnectTimeoutError as ex: # raise error.GrabConnectionError('ConnectTimeoutError', ex) #except exceptions.ProtocolError as ex: # # TODO: # # the code # # raise error.GrabConnectionError(ex.args[1][0], ex.args[1][1]) # # fails # # with error TypeError: 'OSError' object is not subscriptable # raise error.GrabConnectionError('ProtocolError', ex) #except exceptions.SSLError as ex: # raise error.GrabConnectionError('SSLError', ex) # WTF? self.request_head = b'' self.request_body = b'' self.request_log = b'' self._response = res
import requests import time import re import os # import MongoDB from PIL import Image from urllib3.contrib.socks import SOCKSProxyManager #just a test url = "https://game.nihaoma.top/t1/?template=blue&token=b595fa57&CateID=10#/" http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) ######## 访问某些网站使用本地ssr代理 proxy = urllib3.ProxyManager('http://127.0.0.1:1087', 'https://127.0.0.1:1087') sockproxy = SOCKSProxyManager('socks5://localhost:1086') ''' 通过ssr代理访问 端口1080 无Referer ''' # def visitByProxy(url): # try: # web = proxy.request('GET', url, # headers={ # 'User-Agent': # 'ozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1.1 Safari/605.1.15' # , 'accept-language': "zh-CN,zh;q=0.9,zh-TW;q=0.8" # # "Host": "www.google.com", # # 'Referer':" https://www.google.com/" # }) # #except BaseException: