def _set_request_headers(self, request, headers): """ Set HTTP headers for the request. """ if isinstance(headers, dict): headers = headers.items() for name, value in headers or []: request.setRawHeader(to_bytes(name), to_bytes(value))
def _handle_custom_headers(self, request): if self._get_webpage_attribute(request, "skip_custom_headers"): # XXX: this hack assumes that new requests between # BrowserTab._create_request and this function are not possible, # i.e. we don't give control to the event loop in between. # Unfortunately we can't store this flag on a request itself # because a new QNetworkRequest instance is created by QWebKit. self._set_webpage_attribute(request, "skip_custom_headers", False) return headers = self._get_webpage_attribute(request, "custom_headers") if isinstance(headers, dict): headers = headers.items() for name, value in headers or []: try: if isinstance(value, (int, float)): value = str(value) request.setRawHeader(to_bytes(name), to_bytes(value)) except TypeError: msg = "invalid header {!r}: {!r}. Header keys and values must be strings or bytes" self.log(msg.format(name, value), min_level=1, format_msg=False) continue
def _set_request_headers(self, request, headers): """ Set HTTP headers for the request. """ if isinstance(headers, dict): headers = headers.items() for name, value in headers or []: request.setRawHeader(to_bytes(name), to_bytes(value)) if name.lower() == 'user-agent': self.set_user_agent(value)
def _get_header_value(headers, name, default=None): """ Return header value """ if not headers: return default if isinstance(headers, dict): headers = headers.items() name = to_bytes(name.lower()) for k, v in headers: if name == to_bytes(k.lower()): return v return default
def _send_request(self, url, callback, method='GET', body=None, headers=None): # this is called when request is NOT downloaded via webpage.mainFrame() # XXX: The caller must ensure self._delete_reply is called in a callback. if method.upper() not in ["POST", "GET"]: raise NotImplementedError() if body is not None: assert isinstance(body, bytes) request = self.request_obj(url, headers=headers, body=body) # setting UA for request that is not downloaded via webpage.mainFrame().load_to_mainframe() ua_from_headers = _get_header_value(headers, b'user-agent') web_page_ua = self.web_page.userAgentForUrl(to_qurl(url)) user_agent = ua_from_headers or web_page_ua request.setRawHeader(b"user-agent", to_bytes(user_agent)) if method.upper() == "POST": reply = self.network_manager.post(request, body) else: reply = self.network_manager.get(request) reply.finished.connect(callback) self._replies.add(reply) return reply
def __init__(self, pool, ui_enabled, lua_enabled, lua_sandbox_enabled, lua_package_path, lua_sandbox_allowed_modules, max_timeout, argument_cache_max_entries, strict_lua_runner, ): Resource.__init__(self) self.argument_cache = ArgumentCache(argument_cache_max_entries) self.ui_enabled = ui_enabled self.lua_enabled = lua_enabled _args = pool, max_timeout, self.argument_cache self.putChild(b"render.html", RenderHtmlResource(*_args)) self.putChild(b"render.png", RenderPngResource(*_args)) self.putChild(b"render.jpeg", RenderJpegResource(*_args)) self.putChild(b"render.json", RenderJsonResource(*_args)) self.putChild(b"render.har", RenderHarResource(*_args)) self.putChild(b"_debug", DebugResource(pool, self.argument_cache)) self.putChild(b"_gc", ClearCachesResource(self.argument_cache)) self.putChild(b"_ping", PingResource()) # backwards compatibility self.putChild(b"debug", DebugResource(pool, self.argument_cache, warn=True)) if self.lua_enabled and ExecuteLuaScriptResource is not None: lua_kwargs = dict( pool=pool, sandboxed=lua_sandbox_enabled, lua_package_path=lua_package_path, lua_sandbox_allowed_modules=lua_sandbox_allowed_modules, max_timeout=max_timeout, argument_cache=self.argument_cache, strict=strict_lua_runner, ) self.putChild(b"execute", ExecuteLuaScriptResource( implicit_main=False, **lua_kwargs)) self.putChild(b"run", ExecuteLuaScriptResource( implicit_main=True, **lua_kwargs)) if self.ui_enabled: root = os.path.dirname(__file__) ui = File(os.path.join(root, 'ui')) har_path = os.path.join(root, 'vendor', 'harviewer', 'webapp') ui.putChild(to_bytes(HARVIEWER_PATH), File(har_path)) inspections_path = os.path.join(root, 'kernel', 'inspections') ui.putChild(b"inspections", File(inspections_path)) examples_path = os.path.join(root, 'examples') ui.putChild(b"examples", File(examples_path)) self.putChild(b"_ui", ui) self.putChild(DemoUI.PATH, DemoUI( pool=pool, lua_enabled=self.lua_enabled, max_timeout=max_timeout )) self.max_timeout = max_timeout
def _handle_custom_headers(self, request): if self._get_webpage_attribute(request, "skip_custom_headers"): # XXX: this hack assumes that new requests between # BrowserTab._create_request and this function are not possible, # i.e. we don't give control to the event loop in between. # Unfortunately we can't store this flag on a request itself # because a new QNetworkRequest instance is created by QWebKit. self._set_webpage_attribute(request, "skip_custom_headers", False) return headers = self._get_webpage_attribute(request, "custom_headers") if isinstance(headers, dict): headers = headers.items() for name, value in headers or []: request.setRawHeader(to_bytes(name), to_bytes(value))
def http_post(self, url, callback, headers=None, follow_redirects=True, body=None): if body is not None: body = to_bytes(body) self.http_client.post(url, callback=callback, headers=headers, follow_redirects=follow_redirects, body=body)
def __init__(self, pool, ui_enabled, lua_enabled, lua_sandbox_enabled, lua_package_path, lua_sandbox_allowed_modules, max_timeout, argument_cache_max_entries, strict_lua_runner, ): Resource.__init__(self) self.argument_cache = ArgumentCache(argument_cache_max_entries) self.ui_enabled = ui_enabled self.lua_enabled = lua_enabled _args = pool, max_timeout, self.argument_cache self.putChild(b"render.html", RenderHtmlResource(*_args)) self.putChild(b"render.png", RenderPngResource(*_args)) self.putChild(b"render.jpeg", RenderJpegResource(*_args)) self.putChild(b"render.json", RenderJsonResource(*_args)) self.putChild(b"render.har", RenderHarResource(*_args)) self.putChild(b"_debug", DebugResource(pool, self.argument_cache)) self.putChild(b"_gc", ClearCachesResource(self.argument_cache)) self.putChild(b"_ping", PingResource()) # backwards compatibility self.putChild(b"debug", DebugResource(pool, self.argument_cache, warn=True)) if self.lua_enabled and ExecuteLuaScriptResource is not None: self.putChild(b"execute", ExecuteLuaScriptResource( pool=pool, sandboxed=lua_sandbox_enabled, lua_package_path=lua_package_path, lua_sandbox_allowed_modules=lua_sandbox_allowed_modules, max_timeout=max_timeout, argument_cache=self.argument_cache, strict=strict_lua_runner, )) if self.ui_enabled: root = os.path.dirname(__file__) ui = File(os.path.join(root, 'ui')) har_path = os.path.join(root, 'vendor', 'harviewer', 'webapp') ui.putChild(to_bytes(HARVIEWER_PATH), File(har_path)) inspections_path = os.path.join(root, 'kernel', 'inspections') ui.putChild(b"inspections", File(inspections_path)) examples_path = os.path.join(root, 'examples') ui.putChild(b"examples", File(examples_path)) self.putChild(b"_ui", ui) self.putChild(DemoUI.PATH, DemoUI( pool=pool, lua_enabled=self.lua_enabled, max_timeout=max_timeout )) self.max_timeout = max_timeout
def go(self, url, callback, errback, baseurl=None, http_method='GET', body=None, headers=None): """ Go to an URL. This is similar to entering an URL in address tab and pressing Enter. """ self.store_har_timing("_onStarted") if body is not None: body = to_bytes(body) if baseurl: # If baseurl is used, we download the page manually, # then set its contents to the QWebPage and let it # download related resources and render the result. cb = functools.partial( self._on_baseurl_request_finished, callback=callback, errback=errback, baseurl=baseurl, url=url, ) self.http_client.request( url, callback=cb, method=http_method, body=body, headers=headers, follow_redirects=True, ) else: # if not self._goto_callbacks.isempty(): # self.logger.log("Only a single concurrent 'go' request is supported. " # "Previous go requests will be cancelled.", min_level=1) # # When a new URL is loaded to mainFrame an errback will # # be called, so we're not cancelling this callback manually. callback_id = self._load_finished.connect( self._on_content_ready, callback=callback, errback=errback, ) self.logger.log("callback %s is connected to loadFinished" % callback_id, min_level=3) self._load_url_to_mainframe(url, http_method, body, headers=headers)
def _get_entrypoint(lua, script): """ Execute a script and return its "main" function. >>> import lupa; lua = lupa.LuaRuntime() >>> main = _get_entrypoint(lua, "x=1; function main() return 55 end") >>> main() 55 """ lua.execute(to_bytes(script)) return lua.eval("main")
def har_cookie2qt(cls, cookie): qcookie = QNetworkCookie() qcookie.setName(to_bytes(cookie["name"])) qcookie.setValue(to_bytes(cookie["value"])) if 'domain' in cookie: qcookie.setDomain(cookie["domain"]) if 'httpOnly' in cookie: qcookie.setHttpOnly(cookie["httpOnly"]) if 'secure' in cookie: qcookie.setSecure(cookie["secure"]) if 'path' in cookie: qcookie.setPath(cookie["path"]) if cookie.get('expires'): expires = QDateTime.fromString(cookie["expires"], Qt.ISODate) qcookie.setExpirationDate(expires) return qcookie
def test_b64_encode(self): for txt in ["hello", u"привет", ""]: resp = self.request_lua(""" b64 = require('base64') function main(splash) return {res=b64.encode(splash.args.txt)} end """, {'txt': txt}) self.assertStatusCode(resp, 200) txt = to_bytes(txt) self.assertEqual(resp.json(), { 'res': to_unicode(base64.b64encode(txt)) })
def test_b64_encode(self): for txt in ["hello", u"привет", ""]: resp = self.request_lua( """ b64 = require('base64') function main(splash) return {res=b64.encode(splash.args.txt)} end """, {'txt': txt}) self.assertStatusCode(resp, 200) txt = to_bytes(txt) self.assertEqual(resp.json(), {'res': to_unicode(base64.b64encode(txt))})
def go(self, url, callback, errback, baseurl=None, http_method='GET', body=None, headers=None): """ Go to an URL. This is similar to entering an URL in address tab and pressing Enter. """ self.store_har_timing("_onStarted") if body is not None: body = to_bytes(body) headers_user_agent = _get_header_value(headers, b"user-agent") if headers_user_agent: # User passed User-Agent header to go() so we need to set # consistent UA for all rendering requests. # Passing UA header to go() will have same effect as splash:set_user_agent(). self.set_user_agent(headers_user_agent) if baseurl: # If baseurl is used, we download the page manually, # then set its contents to the QWebPage and let it # download related resources and render the result. cb = functools.partial( self._on_baseurl_request_finished, callback=callback, errback=errback, baseurl=baseurl, url=url, ) self.http_client.request(url, callback=cb, method=http_method, body=body, headers=headers, follow_redirects=True, ) else: # if not self._goto_callbacks.isempty(): # self.logger.log("Only a single concurrent 'go' request is supported. " # "Previous go requests will be cancelled.", min_level=1) # # When a new URL is loaded to mainFrame an errback will # # be called, so we're not cancelling this callback manually. callback_id = self._load_finished.connect( self._on_content_ready, callback=callback, errback=errback, ) self.logger.log("callback %s is connected to loadFinished" % callback_id, min_level=3) self._load_url_to_mainframe(url, http_method, body, headers=headers)
def run_in_sandbox(lua, script): """ Execute ``script`` in ``lua`` runtime using "sandbox" Lua module. Return a (sandboxed) global environment for the executed script. "sandbox" module should be importable in the environment. It should provide ``sandbox.run(untrusted_code)`` method and ``sandbox.env`` table with a global environment. See ``splash/lua_modules/sandbox.lua``. """ sandbox = lua.eval("require('sandbox')") result = sandbox.run(to_bytes(script)) if result is not True: ok, res = result raise lupa.LuaError(res) return sandbox.env
def tokenize(self, lua_source, pad=1): # Our lexer doesn't support unicode. To avoid exceptions, # replace all non-ascii characters before the tokenization. # This is not optimal, but Lua doesn't allow unicode identifiers, # so non-ascii text usually is not interesting for the completion # engine. lua_source = to_bytes(lua_source, 'ascii', 'replace') res = self._completer.tokenize(lua_source) padding = [Token("NA", "")] * pad tokens = [ Token( self.lua.lua2python(t[b"tp"], encoding='utf8'), self.lua.lua2python(t[b"value"], encoding='utf8'), ) for t in res.values() ] return padding + tokens
def p2l(obj, depth): if depth <= 0: raise ValueError( "Can't convert Python object to Lua: depth limit is reached") if isinstance(obj, PyResult): return tuple(p2l(elt, depth - 1) for elt in obj.result) if isinstance(obj, dict): return lua.table_from({ p2l(key, depth - 1): p2l(value, depth - 1) for key, value in obj.items() }) if isinstance(obj, tuple) and keep_tuples: return tuple(p2l(el, depth - 1) for el in obj) if isinstance(obj, (list, tuple)): tbl = lua.table_from([p2l(el, depth - 1) for el in obj]) return _mark_table_as_array(lua, tbl) if isinstance(obj, str): return obj.encode(encoding) if isinstance(obj, datetime.datetime): return to_bytes(obj.isoformat() + 'Z', encoding) # XXX: maybe return datetime encoded to Lua standard? E.g.: # tm = obj.timetuple() # return python2lua(lua, { # '_jstype': 'Date', # 'year': tm.tm_year, # 'month': tm.tm_mon, # 'day': tm.tm_mday, # 'yday': tm.tm_yday, # 'wday': tm.tm_wday, # fixme: in Lua Sunday is 1, in Python Monday is 0 # 'hour': tm.tm_hour, # 'min': tm.tm_min, # 'sec': tm.tm_sec, # 'isdst': tm.tm_isdst, # fixme: isdst can be -1 in Python # }, max_depth) return obj
def p2l(obj, depth): if depth <= 0: raise ValueError("Can't convert Python object to Lua: depth limit is reached") if isinstance(obj, PyResult): return tuple(p2l(elt, depth-1) for elt in obj.result) if isinstance(obj, dict): return lua.table_from({ p2l(key, depth-1): p2l(value, depth-1) for key, value in six.iteritems(obj) }) if isinstance(obj, tuple) and keep_tuples: return tuple(p2l(el, depth-1) for el in obj) if isinstance(obj, (list, tuple)): tbl = lua.table_from([p2l(el, depth-1) for el in obj]) return _mark_table_as_array(lua, tbl) if isinstance(obj, six.text_type): return obj.encode(encoding) if isinstance(obj, datetime.datetime): return to_bytes(obj.isoformat() + 'Z', encoding) # XXX: maybe return datetime encoded to Lua standard? E.g.: # tm = obj.timetuple() # return python2lua(lua, { # '_jstype': 'Date', # 'year': tm.tm_year, # 'month': tm.tm_mon, # 'day': tm.tm_mday, # 'yday': tm.tm_yday, # 'wday': tm.tm_wday, # fixme: in Lua Sunday is 1, in Python Monday is 0 # 'hour': tm.tm_hour, # 'min': tm.tm_min, # 'sec': tm.tm_sec, # 'isdst': tm.tm_isdst, # fixme: isdst can be -1 in Python # }, max_depth) return obj
def test_errors_argument(self): self.assertEqual(to_bytes(u'a\ufffdb', 'latin-1', errors='replace'), b'a?b')
def test_converting_a_unicode_object_to_a_latin_1_encoded_string(self): self.assertEqual(to_bytes(u'\xa3 49', 'latin-1'), b'\xa3 49')
def test_converting_a_regular_bytes_to_bytes_should_return_the_same_object(self): self.assertEqual(to_bytes(b'lel\xf1e'), b'lel\xf1e')
def test_errors_argument(self): self.assertEqual( to_bytes(u'a\ufffdb', 'latin-1', errors='replace'), b'a?b' )
def test_converting_a_regular_bytes_to_bytes_should_return_the_same_object( self): self.assertEqual(to_bytes(b'lel\xf1e'), b'lel\xf1e')
def set_header(self, name, value): self.request.setRawHeader(to_bytes(name), to_bytes(value))
def to_qurl(s): if isinstance(s, QUrl): return s return QUrl.fromEncoded(to_bytes(s, encoding='utf8'))
def base64_encode(self, data): if isinstance(data, BinaryCapsule): return data.as_b64() data = to_bytes(data) return base64.b64encode(data)
def test_converting_a_unicode_object_to_an_utf_8_encoded_string(self): self.assertEqual(to_bytes(u'\xa3 49'), b'\xc2\xa3 49')