def fetch(self, request, opener=None, summary=None): if not self.__enable_http: return (None, None) if opener is None: opener = OpenerDirector() opener.add_handler(HTTPDefaultErrorHandler()) opener.add_handler(HTTPSHandler()) t = time.clock() response = opener.open(request) body = response.read() t = timedelta(seconds=time.clock() - t) url = request.get_full_url() self.__context.get_logger().info('HTTP time: %s\n%s' % (t, url)) if self.__log_http: log_dir = os.path.join(self.__context.get_config_dir(), 'http-log') makedirs(log_dir) log_file = os.path.join(log_dir, datetime.utcnow().strftime( '%Y-%m-%d-%H-%M-%S-%f')) if summary is not None: log_file += '-' + _safe_str(summary) fp = open(log_file, 'w') fp.write('\n\n'.join([ request.get_full_url(), request.get_data() or 'No request data', body or 'No response body', ])) fp.close() return (response, body)
def test_proxy_https(self): o = OpenerDirector() ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128")) o.add_handler(ph) meth_spec = [[("https_open", "return response")]] handlers = add_ordered_mock_handlers(o, meth_spec) req = Request("https://www.example.com/") self.assertEqual(req.get_host(), "www.example.com") r = o.open(req) self.assertEqual(req.get_host(), "proxy.example.com:3128") self.assertEqual([(handlers[0], "https_open")], [tup[0:2] for tup in o.calls])
def post_info(url, **kwargs): # opener = build_opener() opener = OpenerDirector() opener.add_handler(HTTPHandler()) form_data = urlencode(kwargs) req = Request( url, data=form_data, headers={ 'Content-Type': 'application/x-www-form-urlencoded', } ) return opener.open(req)
def test_proxy_https(self): o = OpenerDirector() ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128")) o.add_handler(ph) meth_spec = [[("https_open", "return response")]] handlers = add_ordered_mock_handlers(o, meth_spec) req = Request("https://www.example.com/") self.assertEqual(req.get_host(), "www.example.com") r = o.open(req) self.assertEqual(req.get_host(), "proxy.example.com:3128") self.assertEqual([(handlers[0], "https_open")], [tup[0:2] for tup in o.calls])
def test_proxy_no_proxy(self): os.environ['no_proxy'] = 'python.org' o = OpenerDirector() ph = urllib.request.ProxyHandler(dict(http="proxy.example.com")) o.add_handler(ph) req = Request("http://www.perl.org/") self.assertEqual(req.get_host(), "www.perl.org") r = o.open(req) self.assertEqual(req.get_host(), "proxy.example.com") req = Request("http://www.python.org") self.assertEqual(req.get_host(), "www.python.org") r = o.open(req) self.assertEqual(req.get_host(), "www.python.org") del os.environ['no_proxy']
def test_basic_auth(self, quote_char='"'): opener = OpenerDirector() password_manager = MockPasswordManager() auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager) realm = "ACME Widget Store" http_handler = MockHTTPHandler( 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' % (quote_char, realm, quote_char) ) opener.add_handler(auth_handler) opener.add_handler(http_handler) self._test_basic_auth(opener, auth_handler, "Authorization", realm, http_handler, password_manager, "http://acme.example.com/protected", "http://acme.example.com/protected", )
def test_proxy_basic_auth(self): opener = OpenerDirector() ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128")) opener.add_handler(ph) password_manager = MockPasswordManager() auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager) realm = "ACME Networks" http_handler = MockHTTPHandler( 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm) opener.add_handler(auth_handler) opener.add_handler(http_handler) self._test_basic_auth(opener, auth_handler, "Proxy-authorization", realm, http_handler, password_manager, "http://acme.example.com:3128/protected", "proxy.example.com:3128", )
def test_badly_named_methods(self): # test work-around for three methods that accidentally follow the # naming conventions for handler methods # (*_open() / *_request() / *_response()) # These used to call the accidentally-named methods, causing a # TypeError in real code; here, returning self from these mock # methods would either cause no exception, or AttributeError. from urllib.error import URLError o = OpenerDirector() meth_spec = [[("do_open", "return self"), ("proxy_open", "return self")], [("redirect_request", "return self")]] handlers = add_ordered_mock_handlers(o, meth_spec) o.add_handler(urllib.request.UnknownHandler()) for scheme in "do", "proxy", "redirect": self.assertRaises(URLError, o.open, scheme + "://example.com/")
def test_handler_order(self): o = OpenerDirector() handlers = [] for meths, handler_order in [ ([("http_open", "return self")], 500), (["http_open"], 0), ]: class MockHandlerSubclass(MockHandler): pass h = MockHandlerSubclass(meths) h.handler_order = handler_order handlers.append(h) o.add_handler(h) r = o.open("http://example.com/") # handlers called in reverse order, thanks to their sort order self.assertEqual(o.calls[0][0], handlers[1]) self.assertEqual(o.calls[1][0], handlers[0])
def test_basic_auth(self, quote_char='"'): opener = OpenerDirector() password_manager = MockPasswordManager() auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager) realm = "ACME Widget Store" http_handler = MockHTTPHandler( 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' % (quote_char, realm, quote_char)) opener.add_handler(auth_handler) opener.add_handler(http_handler) self._test_basic_auth( opener, auth_handler, "Authorization", realm, http_handler, password_manager, "http://acme.example.com/protected", "http://acme.example.com/protected", )
def test_handler_order(self): o = OpenerDirector() handlers = [] for meths, handler_order in [ ([("http_open", "return self")], 500), (["http_open"], 0), ]: class MockHandlerSubclass(MockHandler): pass h = MockHandlerSubclass(meths) h.handler_order = handler_order handlers.append(h) o.add_handler(h) r = o.open("http://example.com/") # handlers called in reverse order, thanks to their sort order self.assertEqual(o.calls[0][0], handlers[1]) self.assertEqual(o.calls[1][0], handlers[0])
def test_badly_named_methods(self): # test work-around for three methods that accidentally follow the # naming conventions for handler methods # (*_open() / *_request() / *_response()) # These used to call the accidentally-named methods, causing a # TypeError in real code; here, returning self from these mock # methods would either cause no exception, or AttributeError. from urllib.error import URLError o = OpenerDirector() meth_spec = [ [("do_open", "return self"), ("proxy_open", "return self")], [("redirect_request", "return self")], ] handlers = add_ordered_mock_handlers(o, meth_spec) o.add_handler(urllib.request.UnknownHandler()) for scheme in "do", "proxy", "redirect": self.assertRaises(URLError, o.open, scheme + "://example.com/")
def test_proxy_https_proxy_authorization(self): o = OpenerDirector() ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128")) o.add_handler(ph) https_handler = MockHTTPSHandler() o.add_handler(https_handler) req = Request("https://www.example.com/") req.add_header("Proxy-Authorization", "FooBar") req.add_header("User-Agent", "Grail") self.assertEqual(req.get_host(), "www.example.com") self.assertTrue(req._tunnel_host is None) r = o.open(req) # Verify Proxy-Authorization gets tunneled to request. # httpsconn req_headers do not have the Proxy-Authorization header but # the req will have. self.assertFalse(("Proxy-Authorization", "FooBar") in https_handler.httpconn.req_headers) self.assertTrue(("User-Agent", "Grail") in https_handler.httpconn.req_headers) self.assertFalse(req._tunnel_host is None) self.assertEqual(req.get_host(), "proxy.example.com:3128") self.assertEqual(req.get_header("Proxy-authorization"), "FooBar")
def test_proxy_basic_auth(self): opener = OpenerDirector() ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128")) opener.add_handler(ph) password_manager = MockPasswordManager() auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager) realm = "ACME Networks" http_handler = MockHTTPHandler( 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm) opener.add_handler(auth_handler) opener.add_handler(http_handler) self._test_basic_auth( opener, auth_handler, "Proxy-authorization", realm, http_handler, password_manager, "http://acme.example.com:3128/protected", "proxy.example.com:3128", )
def build_tor_opener(guard, hops_count=3, debuglevel=0): opener = OpenerDirector() default_classes = [ ProxyHandler, UnknownHandler, HTTPDefaultErrorHandler, HTTPRedirectHandler, HTTPErrorProcessor ] for cls in default_classes: opener.add_handler(cls()) opener.add_handler(TorHTTPHandler(guard, hops_count, debuglevel=debuglevel)) opener.add_handler( TorHTTPSHandler(guard, hops_count, debuglevel=debuglevel)) opener.addheaders = [] return opener
def build_opener(*handlers, **kw): """Create an opener object from a list of handlers. The opener will use several default handlers, including support for HTTP and FTP. If any of the handlers passed as arguments are subclasses of the default handlers, the default handlers will not be used. """ def isclass(obj): return isinstance(obj, class_type_) or hasattr(obj, "__bases__") opener = OpenerDirector() default_classes = [ ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, HTTPRedirectHandler, FTPHandler, FileHandler, HTTPErrorProcessor ] check_classes = list(default_classes) check_classes.append(HTTPSContextHandler) skip = [] for klass in check_classes: for check in handlers: if isclass(check): if issubclass(check, klass): skip.append(klass) elif isinstance(check, klass): skip.append(klass) for klass in default_classes: if klass not in skip: opener.add_handler(klass()) # Pick up SSL context from keyword settings ssl_context = kw.get('ssl_context') # Add the HTTPS handler with ssl_context if HTTPSContextHandler not in skip: opener.add_handler(HTTPSContextHandler(ssl_context)) for h in handlers: if isclass(h): h = h() opener.add_handler(h) return opener
def build_opener(*handlers, **kw): """Create an opener object from a list of handlers. The opener will use several default handlers, including support for HTTP and FTP. If any of the handlers passed as arguments are subclasses of the default handlers, the default handlers will not be used. """ def isclass(obj): return isinstance(obj, class_type_) or hasattr(obj, "__bases__") opener = OpenerDirector() default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, HTTPRedirectHandler, FTPHandler, FileHandler, HTTPErrorProcessor] check_classes = list(default_classes) check_classes.append(HTTPSContextHandler) skip = [] for klass in check_classes: for check in handlers: if isclass(check): if issubclass(check, klass): skip.append(klass) elif isinstance(check, klass): skip.append(klass) for klass in default_classes: if klass not in skip: opener.add_handler(klass()) # Pick up SSL context from keyword settings ssl_context = kw.get('ssl_context') # Add the HTTPS handler with ssl_context if HTTPSContextHandler not in skip: opener.add_handler(HTTPSContextHandler(ssl_context)) for h in handlers: if isclass(h): h = h() opener.add_handler(h) return opener
def opener_setup(): from urllib.request import OpenerDirector, ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, DataHandler opener = OpenerDirector() for klass in [ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, DataHandler]: opener.add_handler(klass()) install_opener(opener)
type=socket.SOCK_STREAM, proto=0, fileno=None) self.sock.settimeout(self.timeout) self.sock.connect(self.socket_file) def socket_open(self, req): socket_file, path = req.selector.split(':', 1) req.host = socket_file req.selector = path return self.do_open(self.SocketFileToHttpConnectionAdaptor, req) better_urllib_get = OpenerDirector() better_urllib_get.addheaders = DEFAULT_HEADERS.copy() better_urllib_get.add_handler(HTTPHandler()) better_urllib_get.add_handler(HTTPSHandler()) better_urllib_get.add_handler(SocketFileHandler()) # Util functions ############################################################################################# @lru_cache() def get_url(url): response = better_urllib_get.open(url, timeout=timeout) return process_urllib_response(response), response.status def process_urllib_response(response): response_bytes = response.read()
auth_token = self._get_outh2_token(www_authenticate_header) request.add_unredirected_header('Authorization', 'Bearer ' + auth_token) return self.parent.open(request, timeout=request.timeout) # Got some help from this example https://gist.github.com/FiloSottile/2077115 class HeadRequest(Request): def get_method(self): return "HEAD" better_urllib_get = OpenerDirector() better_urllib_get.addheaders = DEFAULT_HEADERS.copy() better_urllib_get.add_handler(HTTPHandler()) better_urllib_get.add_handler(HTTPSHandler()) better_urllib_get.add_handler(HTTPRedirectHandler()) better_urllib_get.add_handler(SocketFileHandler()) better_urllib_get.add_handler(Oauth2TokenAuthHandler()) class RegistryError(Exception): def __init__(self, response): self.response_obj = response # Util functions ############################################################################################# def parse_thresholds(spec, include_units=True, units_required=True): """
def build_test_opener(*handler_instances): opener = OpenerDirector() for h in handler_instances: opener.add_handler(h) return opener
def build_test_opener(*handler_instances): opener = OpenerDirector() for h in handler_instances: opener.add_handler(h) return opener
def resolve(self, item, captcha_cb=None, select_cb=None): item = item.copy() util.init_urllib() url = self._url(item['url']) page = '' try: opener = OpenerDirector() opener.add_handler(HTTPHandler()) opener.add_handler(UnknownHandler()) install_opener(opener) request = Request(url) request.add_header('User-Agent', util.UA) response = urlopen(request) page = response.read() response.close() except HTTPError as e: traceback.print_exc() return data = util.substr(page, '<form method=post target=\"iframe_dwn\"', '</form>') action = re.search('action=(?P<url>[^>]+)', data, re.IGNORECASE | re.DOTALL) img = re.search('<img src=\"(?P<url>[^\"]+)', data, re.IGNORECASE | re.DOTALL) if img and action: sessid = [] for cookie in re.finditer('(PHPSESSID=[^\;]+)', response.headers.get('Set-Cookie'), re.IGNORECASE | re.DOTALL): sessid.append(cookie.group(1)) # we have to download image ourselves image = util.request(self._url(img.group('url')), headers={ 'Referer': url, 'Cookie': sessid[-1] }) img_file = os.path.join(self.tmp_dir, 'captcha.png') util.save_data_to_file(image, img_file) code = None if captcha_cb: code = captcha_cb({'id': '0', 'img': img_file}) if not code: self.info('No captcha received, exit') return request = urllib.urlencode({'code': code}) req = Request(self._url(action.group('url')), request) req.add_header('User-Agent', util.UA) req.add_header('Referer', url) req.add_header('Cookie', sessid[-1]) try: resp = urlopen(req) if resp.code == 302: file_url = resp.headers.get('location') else: file_url = resp.geturl() if file_url.find(action.group('url')) > 0: msg = resp.read() resp.close() js_msg = re.search('alert\(\'(?P<msg>[^\']+)', msg, re.IGNORECASE | re.DOTALL) if js_msg: raise ResolveException(js_msg.group('msg')) self.error(msg) raise ResolveException( 'Nelze ziskat soubor, zkuste to znovu') resp.close() if file_url.find('data') >= 0 or file_url.find( 'download_free') > 0: item['url'] = file_url return item self.error('wrong captcha, retrying') return self.resolve(item, captcha_cb, select_cb) except HTTPError: traceback.print_exc() return
auth_token = self._get_outh2_token(www_authenticate_header) request.add_unredirected_header('Authorization', 'Bearer ' + auth_token) return self.parent.open(request, timeout=request.timeout) # Got some help from this example https://gist.github.com/FiloSottile/2077115 class HeadRequest(Request): def get_method(self): return "HEAD" better_urllib_get = OpenerDirector() better_urllib_get.addheaders = DEFAULT_HEADERS.copy() better_urllib_get.add_handler(HTTPHandler()) better_urllib_get.add_handler(HTTPSHandler()) better_urllib_get.add_handler(HTTPRedirectHandler()) better_urllib_get.add_handler(SocketFileHandler()) better_urllib_get.add_handler(Oauth2TokenAuthHandler()) class RegistryError(Exception): def __init__(self, response): self.response_obj = response # Util functions ############################################################################################# def parse_thresholds(spec, include_units=True, units_required=True): """
import sys from urllib.request import OpenerDirector, HTTPRedirectHandler, HTTPSHandler, urlretrieve REPO_URL = 'https://github.com/facebook/rocksdb' assert len(sys.argv) > 1, 'Please provide a download directory, e.g. /build' assert len(sys.argv) < 3, f'Please omit the unexpected arguments: {sys.argv[2:]}' download_dir = sys.argv[1] od = OpenerDirector() od.add_handler(HTTPSHandler()) od.add_handler(HTTPRedirectHandler()) resp = od.open(f'{REPO_URL}/releases/latest/download/') tag_name = resp.headers['location'].split('/')[-1] release_url = f'{REPO_URL}/archive/{tag_name}.tar.gz' file_path, headers = urlretrieve(release_url, f'{download_dir}/latest.tar.gz') print(f'RocksDB {tag_name} was downloaded to {file_path}', file=sys.stderr) print(tag_name[1:])
def connect(self): self.sock = socket.socket(family=socket.AF_UNIX, type=socket.SOCK_STREAM, proto=0, fileno=None) self.sock.settimeout(self.timeout) self.sock.connect(self.socket_file) def socket_open(self, req): socket_file, path = req.selector.split(':', 1) req.host = socket_file req.selector = path return self.do_open(self.SocketFileToHttpConnectionAdaptor, req) better_urllib_get = OpenerDirector() better_urllib_get.addheaders = DEFAULT_HEADERS.copy() better_urllib_get.add_handler(HTTPHandler()) better_urllib_get.add_handler(HTTPSHandler()) better_urllib_get.add_handler(SocketFileHandler()) # Util functions ############################################################################################# @lru_cache() def get_url(url): response = better_urllib_get.open(url, timeout=timeout) return process_urllib_response(response), response.status def process_urllib_response(response):