def render( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, input_table: ArrowTable, params: Params, tab: Tab, fetch_result: Optional[FetchResult], output_filename: str, ) -> RenderResult: """Run the module's `render_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.RenderRequest( str(basedir_seen_by_module), arrow_arrow_table_to_thrift(input_table), arrow_params_to_thrift(params), arrow_tab_to_thrift(tab), (None if fetch_result is None else arrow_fetch_result_to_thrift(fetch_result)), output_filename, ) try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=pyspawner.NetworkConfig( ), # TODO disallow networking compiled_module=compiled_module, timeout=self.render_timeout, result=ttypes.RenderResult(), function="render_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() if result.table.filename and result.table.filename != output_filename: raise ModuleExitedError(compiled_module.module_slug, 0, "Module wrote to wrong output file") try: # thrift_render_result_to_arrow() verifies all filenames passed by # the module are in the directory the module has access to. It # assumes the Arrow file (if there is one) is untrusted, so it can # raise ValidateError render_result = thrift_render_result_to_arrow(result, basedir) except ValidateError as err: raise ModuleExitedError( compiled_module.module_slug, 0, "Module produced invalid data: %s" % str(err), ) return render_result
def render( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, input_filename: str, params: Dict[str, Any], tab_name: str, fetch_result: Optional[FetchResult], tab_outputs: List[TabOutput], uploaded_files: Dict[str, UploadedFile], output_filename: str, ) -> RenderResult: """Run the module's `render_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.RenderRequest( basedir=str(basedir_seen_by_module), params=pydict_to_thrift_json_object(params), tab_name=tab_name, tab_outputs={ k: arrow_tab_output_to_thrift(v) for k, v in tab_outputs.items() }, uploaded_files={ k: arrow_uploaded_file_to_thrift(v) for k, v in uploaded_files.items() }, fetch_result=(None if fetch_result is None else arrow_fetch_result_to_thrift(fetch_result)), output_filename=output_filename, input_filename=input_filename, ) if compiled_module.module_slug in {"pythoncode", "ACS2016"}: # TODO disallow networking; make network_config always None network_config = pyspawner.NetworkConfig() else: network_config = None try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=network_config, compiled_module=compiled_module, timeout=self.render_timeout, result=ttypes.RenderResult(), function="render_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() return thrift_render_result_to_arrow(result)
def fetch( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, params: Dict[str, Any], secrets: Dict[str, Any], last_fetch_result: Optional[FetchResult], input_parquet_filename: Optional[str], output_filename: str, ) -> FetchResult: """Run the module's `fetch_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.FetchRequest( basedir=str(basedir_seen_by_module), params=pydict_to_thrift_json_object(params), secrets=pydict_to_thrift_json_object(secrets), last_fetch_result=( None if last_fetch_result is None else arrow_fetch_result_to_thrift(last_fetch_result)), input_table_parquet_filename=input_parquet_filename, output_filename=output_filename, ) try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=pyspawner.NetworkConfig(), compiled_module=compiled_module, timeout=self.fetch_timeout, result=ttypes.FetchResult(), function="fetch_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() if result.filename and result.filename != output_filename: raise ModuleExitedError(compiled_module.module_slug, 0, "Module wrote to wrong output file") # TODO validate result isn't too large. If result is dataframe it makes # sense to truncate; but fetch results aren't necessarily data frames. # It's up to the module to enforce this logic ... but we need to set a # maximum file size. return thrift_fetch_result_to_arrow(result, basedir)
def test_SECURITY_private_network_is_off_limits(self): # The module cannot access a service on the private network. # Try to connect to Postgres -- we know it's there. postgres_ip = "10.2.3.4" port = 5432 _spawn_and_communicate_or_raise( self._client, r""" import errno import socket try: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.connect((%r, %r)) assert False, "connect() should have failed" except OSError as err: assert err.errno == errno.ECONNREFUSED """ % (postgres_ip, port), network_config=pyspawner.NetworkConfig(), )
def test_SECURITY_parent_ip_is_off_limits(self): # The module cannot access a service on its host hostname = socket.gethostname() host_ip = socket.gethostbyname(hostname) port = 19999 # arbitrary with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind((host_ip, port)) s.listen(1) _spawn_and_communicate_or_raise( self._client, r""" import errno import socket try: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.connect((%r, %r)) assert False, "connect() should have failed" except OSError as err: assert err.errno == errno.ECONNREFUSED """ % (host_ip, port), network_config=pyspawner.NetworkConfig(), )