def httpRequest(self, url): headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding': 'none', 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive' } req = urllib.request.Request(url, headers=headers) if config.trackers_proxy == "tor": tor_manager = self.site.connection_server.tor_manager handler = sockshandler.SocksiPyHandler(socks.SOCKS5, tor_manager.proxy_ip, tor_manager.proxy_port) opener = urllib.request.build_opener(handler) return opener.open(req, timeout=50) elif config.trackers_proxy == "disable": return urllib.request.urlopen(req, timeout=25) else: proxy_ip, proxy_port = config.trackers_proxy.split(":") handler = sockshandler.SocksiPyHandler(socks.SOCKS5, proxy_ip, int(proxy_port)) opener = urllib.request.build_opener(handler) return opener.open(req, timeout=50)
def _additional_handlers(self): handlers = [] if self.session.get('proxy'): protocol, host, port = self._get_proxy() if protocol and host and port: handlers.append( sockshandler.SocksiPyHandler( protocol, host, port ) ) else: raise ChannelException(messages.channels.error_proxy_format) # Skip certificate checks ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE handlers.append(urllib2.HTTPSHandler(context=ctx)) return handlers
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False): ''' When cookiejar_send is set to a CookieJar object, those cookies will be sent in the request (but cookies in response will not be merged into it) When cookiejar_receive is set to a CookieJar object, cookies received in the response will be merged into the object (nothing will be sent from it) When both are set to the same object, cookies will be sent from the object, and response cookies will be merged into it. ''' headers = dict(headers) # Note: Calling dict() on a dict will make a copy headers['Accept-Encoding'] = 'gzip, br' # prevent python version being leaked by urllib if User-Agent isn't provided # (urllib will use ex. Python-urllib/3.6 otherwise) if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers: headers['User-Agent'] = 'Python-urllib' method = "GET" if data is not None: method = "POST" if isinstance(data, str): data = data.encode('ascii') elif not isinstance(data, bytes): data = urllib.parse.urlencode(data).encode('ascii') start_time = time.time() if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib req = urllib.request.Request(url, data=data, headers=headers) cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive) if use_tor and settings.route_tor: opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor) else: opener = urllib.request.build_opener(cookie_processor) response = opener.open(req, timeout=timeout) response_time = time.time() content = response.read() else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them. pool = get_pool(use_tor and settings.route_tor) response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False, decode_content=False) response_time = time.time() content = response.read() response.release_conn() read_finish = time.time() if report_text: print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3)) content = decode_content(content, response.getheader('Content-Encoding', default='identity')) if return_response: return content, response return content
def fetch_url_response(url, headers=(), timeout=15, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, max_redirects=None): ''' returns response, cleanup_function When cookiejar_send is set to a CookieJar object, those cookies will be sent in the request (but cookies in response will not be merged into it) When cookiejar_receive is set to a CookieJar object, cookies received in the response will be merged into the object (nothing will be sent from it) When both are set to the same object, cookies will be sent from the object, and response cookies will be merged into it. ''' headers = dict(headers) # Note: Calling dict() on a dict will make a copy if have_brotli: headers['Accept-Encoding'] = 'gzip, br' else: headers['Accept-Encoding'] = 'gzip' # prevent python version being leaked by urllib if User-Agent isn't provided # (urllib will use ex. Python-urllib/3.6 otherwise) if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers: headers['User-Agent'] = 'Python-urllib' method = "GET" if data is not None: method = "POST" if isinstance(data, str): data = data.encode('utf-8') elif not isinstance(data, bytes): data = urllib.parse.urlencode(data).encode('utf-8') if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib req = urllib.request.Request(url, data=data, headers=headers) cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive) if use_tor and settings.route_tor: opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", settings.tor_port), cookie_processor) else: opener = urllib.request.build_opener(cookie_processor) response = opener.open(req, timeout=timeout) cleanup_func = (lambda r: None) else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them. # default: Retry.DEFAULT = Retry(3) # (in connectionpool.py in urllib3) # According to the documentation for urlopen, a redirect counts as a # retry. So there are 3 redirects max by default. if max_redirects: retries = urllib3.Retry(3+max_redirects, redirect=max_redirects) else: retries = urllib3.Retry(3) pool = get_pool(use_tor and settings.route_tor) response = pool.request(method, url, headers=headers, body=data, timeout=timeout, preload_content=False, decode_content=False, retries=retries) cleanup_func = (lambda r: r.release_conn()) return response, cleanup_func
def __init__(self, proxy_address='127.0.0.1', proxy_port=9050, *, user_agent=None): logger.info('Starting dummy browser') handler = sockshandler.SocksiPyHandler(socks.SOCKS5, proxy_address, proxy_port) self._opener = urllib.request.build_opener(handler) self._user_agent = user_agent self._open = True logger.info('Started dummy browser')
def test_urllib2_socks5_handler(self): content = b'zzz' self.test_server.response['data'] = content opener = urllib2.build_opener( sockshandler.SocksiPyHandler(socks.SOCKS5, PROXY_HOST_IP, SOCKS5_PROXY_PORT)) res = opener.open(self.test_server.get_url()) body = res.read() self.assertTrue(self.test_server.request['headers'] ['user-agent'].startswith('Python-urllib')) self.assertEqual('%s:%d' % (TEST_SERVER_HOST, TEST_SERVER_PORT), self.test_server.request['headers']['host']) self.assertEqual(200, res.getcode()) self.assertEqual(content, body)
def _additional_handlers(self): handlers = [] if self.session.get('proxy'): protocol, host, port = self._get_proxy() if protocol and host and port: handlers.append( sockshandler.SocksiPyHandler(protocol, host, port)) else: raise ChannelException(messages.channels.error_proxy_format) return handlers
def urllib2_handler_SOCKS5_test(): opener = urllib2.build_opener( sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 1081)) status = opener.open("http://api.externalip.net/ip/").getcode() assert status == 200
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False, debug_name=None): ''' When cookiejar_send is set to a CookieJar object, those cookies will be sent in the request (but cookies in response will not be merged into it) When cookiejar_receive is set to a CookieJar object, cookies received in the response will be merged into the object (nothing will be sent from it) When both are set to the same object, cookies will be sent from the object, and response cookies will be merged into it. ''' headers = dict(headers) # Note: Calling dict() on a dict will make a copy if have_brotli: headers['Accept-Encoding'] = 'gzip, br' else: headers['Accept-Encoding'] = 'gzip' # prevent python version being leaked by urllib if User-Agent isn't provided # (urllib will use ex. Python-urllib/3.6 otherwise) if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers: headers['User-Agent'] = 'Python-urllib' method = "GET" if data is not None: method = "POST" if isinstance(data, str): data = data.encode('ascii') elif not isinstance(data, bytes): data = urllib.parse.urlencode(data).encode('ascii') start_time = time.time() if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib req = urllib.request.Request(url, data=data, headers=headers) cookie_processor = HTTPAsymmetricCookieProcessor( cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive) if use_tor and settings.route_tor: opener = urllib.request.build_opener( sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor) else: opener = urllib.request.build_opener(cookie_processor) response = opener.open(req, timeout=timeout) response_time = time.time() content = response.read() else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them. pool = get_pool(use_tor and settings.route_tor) response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False, decode_content=False) response_time = time.time() content = response.read() response.release_conn() if (response.status == 429 and content.startswith(b'<!DOCTYPE') and b'Our systems have detected unusual traffic' in content): ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)', content) ip = ip.group(1).decode('ascii') if ip else None raise FetchError('429', reason=response.reason, ip=ip) elif response.status >= 400: raise FetchError(str(response.status), reason=response.reason, ip=None) read_finish = time.time() if report_text: print(report_text, ' Latency:', round(response_time - start_time, 3), ' Read time:', round(read_finish - response_time, 3)) content = decode_content( content, response.getheader('Content-Encoding', default='identity')) if settings.debugging_save_responses and debug_name is not None: save_dir = os.path.join(settings.data_dir, 'debug') if not os.path.exists(save_dir): os.makedirs(save_dir) with open(os.path.join(save_dir, debug_name), 'wb') as f: f.write(content) if return_response: return content, response return content
def urllib2_handler_HTTP_test(): opener = urllib2.build_opener(sockshandler.SocksiPyHandler(socks.HTTP, "127.0.0.1", 8080)) status = opener.open("http://api.externalip.net/ip/").getcode() assert status == 200
def build_urllib_opener( proxies=None, ssl_check_hostname=None, extra_handlers=[], extra_pre_handlers=[]): """ A replacement to :py:func:`urllib.request.build_opener` that takes care of using current user's global settings (Keypirinha and/or system's) regarding network connections, by inserting and configuring one or several connection handlers (derived from :py:class:`urllib.request.BaseHandler`). Examples:: # example 1 opener = build_urllib_opener() with opener.open("http://httpbin.org/user-agent") as response: print(response.read()) # example 2: HTTP proxy proxies = {'http': "http://*****:*****@127.0.0.1:8080"} opener = build_urllib_opener(proxies) with opener.open("http://httpbin.org/ip") as response: print(response.read()) Args: proxies (dict): A dictionary of proxies to pass to the constructor of :py:class:`urllib.request.ProxyHandler`, if any. Notes: * ``None`` (default; **recommended**) means proxies configured by the user at Keypirinha's level, or by default at system level will be used. * An empty dictionary (i.e. ``{}``) means **no** proxy will be configured regardless of user or machine settings. Note that going against user's will is against Keypirinha's design policy! * See the notes below about ``SOCKS`` proxies. * See :py:func:`proxies_list_to_dict` if you need to convert a list of proxies URLs into a dictionary. extra_handlers (list): A list/tuple of extra handlers to **append** to the final handlers chain before passing it to :py:func:`urllib.request.build_opener`. extra_pre_handlers (list): A list/tuple of extra handlers to **prepend** to the final handlers chain before passing it to :py:func:`urllib.request.build_opener`. **CAUTION:** This parameter is here for convenience and you should use it only if you know what you are doing as it may interfere with the handlers added by this function. ssl_check_hostname (bool): Should the hostname be checked against received security certificate? This argument is equivalent to tweaking with :py:attr:`ssl.SSLContext.check_hostname` and :py:attr:`ssl.SSLContext.verify_mode`. Default behavior of the ``urllib`` module (i.e. ``None`` value) is to check the hostname unless explicitely specified here (boolean), in which case this function will either add an :py:class:`urllib.request.HTTPSHandler` handler with the appropriate arguments to the chain, or, if caller already added a :py:class:`urllib.request.HTTPSHandler` handler (either in the *extra_handlers* or *extra_pre_handlers* list), it will be modified accordingly. Returns: UrllibOpener: A :py:class:`urllib.request.OpenerDirector`-compatible opener object. Note: Notes about ``SOCKS`` proxy support: * Support for ``SOCKS`` proxy (v4 and v5) is **experimental** and uses the `PySocks <https://github.com/Anorov/PySocks>`_ third-party module under the hood. * DNS requests do not go through the proxy server. * IPv6 connections through the proxy server are not supported. * Tests have shown that if proxies for several schemes have been specified, ``UNKNOWN_PROTOCOL`` SSL error may occurs under some circumstances. For that reason, if a ``SOCKS`` proxy is specified, it takes precedence over the other proxy servers that might be in the dictionary as well so they will be purely ignored by this function in favor of the ``SOCKS`` proxy. """ def _has_handler(handler_type): for h in (*extra_pre_handlers, *extra_handlers): if isinstance(h, handler_type): return h return None own_handlers = [] # get proxies from the application if needed if proxies is None: proxies = proxies_to_dict(kp.settings().get_multiline( "proxy", section="network", fallback=[], keep_empty_lines=False)) # proxy servers if proxies is not None: # socks proxy # in case user specified a "socks" proxy, we have to extract it from the # dict and insert it as a different handler in the final handlers chain # since it is not supported by the standard urrlib module got_socks_proxy = False for scheme, proxy_url in proxies.items(): scheme_lc = scheme.lower() if scheme_lc not in ("socks", "socks4", "socks5"): continue if scheme_lc == "socks4": proxy_type = socks.PROXY_TYPE_SOCKS4 else: proxy_type = socks.PROXY_TYPE_SOCKS5 proxy_info = urllib.parse.urlsplit(proxy_url) if not proxy_info.hostname: raise ValueError("malformed proxy url: {}".format(proxy_url)) if not proxy_info.port: raise ValueError("port number required for proxy: {}".format(proxy_url)) # SOCKS5 only: DNS queries should be performed on the remote side # (default behavior in "socks" module). Unfortunately, in practive, # that does not prevent DNS requests to be made outside of the SOCKS # tunnel as it would require monkey-patching the "socket" module and # would not work in some cases anyway. # More info: https://github.com/Anorov/PySocks/issues/22 proxy_rdns = True # note to self: sockshandler.SocksiPyHandler is derived from # urllib.request.HTTPSHandler!!! own_handlers.append(sockshandler.SocksiPyHandler( proxy_type, proxy_info.hostname, proxy_info.port, proxy_rdns, proxy_info.username, proxy_info.password)) got_socks_proxy = True break # Tests have shown that if mixed proxies are specified (i.e. "http" + # "https" + "socks") and there's a SOCKS proxy in the list, "SSL: # UNKNOWN_PROTOCOL" errors occur with HTTPS urls. As a result, when a # SOCKS proxy is specified, it must be the only proxy in the list. if not got_socks_proxy: own_handlers.append(urllib.request.ProxyHandler(proxies)) if ssl_check_hostname is None: # allow user to override default behavior if needed ssl_check_hostname = kp.settings().get_bool( "ssl_check_hostname", section="network", fallback=None) if ssl_check_hostname is not None: https_handler = _has_handler(urllib.request.HTTPSHandler) if https_handler is not None: if ssl_check_hostname: https_handler._context.check_hostname = True https_handler._context.verify_mode = ssl.CERT_REQUIRED else: https_handler._context.check_hostname = False https_handler._context.verify_mode = ssl.CERT_NONE else: ssl_ctx = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH) if ssl_check_hostname: # This is the default behavior when create_default_context() is # passed the SERVER_AUTH purpose. # "Explicit is better than implicit" (c) ssl_ctx.check_hostname = True ssl_ctx.verify_mode = ssl.CERT_REQUIRED else: ssl_ctx.check_hostname = False ssl_ctx.verify_mode = ssl.CERT_NONE own_handlers.append(urllib.request.HTTPSHandler(context=ssl_ctx)) return UrllibOpener(urllib.request.build_opener( *extra_pre_handlers, *own_handlers, *extra_handlers))
def urllib2_handler_SOCKS5_test(): opener = urllib2.build_opener(sockshandler.SocksiPyHandler(socks.SOCKS5, "127.0.0.1", 1081)) status = opener.open("http://ifconfig.me/ip").getcode() assert status == 200