def _test_render( self, render_fn, arrow_table_dict={}, arrow_table=None, params={}, tab=Tab("tab-1", "Tab 1"), fetch_result=None, output_filename=None, ): with ExitStack() as ctx: if arrow_table is None: arrow_table = ctx.enter_context( arrow_table_context(arrow_table_dict, dir=self.basedir)) ctx.enter_context(patch.object(module, "render", render_fn)) out_filename = ctx.enter_context( tempfile_context(dir=self.basedir)).name thrift_result = module.render_thrift( ttypes.RenderRequest( str(self.basedir), arrow_arrow_table_to_thrift(arrow_table), arrow_params_to_thrift(Params(params)), arrow_tab_to_thrift(tab), arrow_fetch_result_to_thrift(fetch_result) if fetch_result is not None else None, out_filename, )) return thrift_render_result_to_arrow(thrift_result, self.basedir)
def test_default_render_returns_fetch_result(self): # Functionality used by libraryofcongress with ExitStack() as ctx: input_arrow_table = ctx.enter_context( arrow_table_context({"A": [1]}, dir=self.basedir) ) parquet_filename = Path( ctx.enter_context(parquet_file({"A": [2]}, dir=self.basedir)).name ).name out_filename = ctx.enter_context(tempfile_context(dir=self.basedir)).name thrift_result = module.render_thrift( ttypes.RenderRequest( str(self.basedir), input_arrow_table.to_thrift(), Params({}).to_thrift(), ttypes.Tab("tab-1", "Tab 1"), ttypes.FetchResult( parquet_filename, [RenderError(I18nMessage.TODO_i18n("A warning")).to_thrift()], ), out_filename, ) ) result = RenderResult.from_thrift(thrift_result, self.basedir) assert_render_result_equals( result, RenderResult( arrow_table({"A": [2]}), [RenderError(I18nMessage.TODO_i18n("A warning"))], ), )
def render( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, input_table: ArrowTable, params: Params, tab: Tab, fetch_result: Optional[FetchResult], output_filename: str, ) -> RenderResult: """Run the module's `render_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.RenderRequest( str(basedir_seen_by_module), arrow_arrow_table_to_thrift(input_table), arrow_params_to_thrift(params), arrow_tab_to_thrift(tab), (None if fetch_result is None else arrow_fetch_result_to_thrift(fetch_result)), output_filename, ) try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=pyspawner.NetworkConfig( ), # TODO disallow networking compiled_module=compiled_module, timeout=self.render_timeout, result=ttypes.RenderResult(), function="render_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() if result.table.filename and result.table.filename != output_filename: raise ModuleExitedError(compiled_module.module_slug, 0, "Module wrote to wrong output file") try: # thrift_render_result_to_arrow() verifies all filenames passed by # the module are in the directory the module has access to. It # assumes the Arrow file (if there is one) is untrusted, so it can # raise ValidateError render_result = thrift_render_result_to_arrow(result, basedir) except ValidateError as err: raise ModuleExitedError( compiled_module.module_slug, 0, "Module produced invalid data: %s" % str(err), ) return render_result
def render( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, input_filename: str, params: Dict[str, Any], tab_name: str, fetch_result: Optional[FetchResult], tab_outputs: List[TabOutput], uploaded_files: Dict[str, UploadedFile], output_filename: str, ) -> RenderResult: """Run the module's `render_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.RenderRequest( basedir=str(basedir_seen_by_module), params=pydict_to_thrift_json_object(params), tab_name=tab_name, tab_outputs={ k: arrow_tab_output_to_thrift(v) for k, v in tab_outputs.items() }, uploaded_files={ k: arrow_uploaded_file_to_thrift(v) for k, v in uploaded_files.items() }, fetch_result=(None if fetch_result is None else arrow_fetch_result_to_thrift(fetch_result)), output_filename=output_filename, input_filename=input_filename, ) if compiled_module.module_slug in {"pythoncode", "ACS2016"}: # TODO disallow networking; make network_config always None network_config = pyspawner.NetworkConfig() else: network_config = None try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=network_config, compiled_module=compiled_module, timeout=self.render_timeout, result=ttypes.RenderResult(), function="render_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() return thrift_render_result_to_arrow(result)
def call_render( self, table: pa.Table, params: Dict[str, Any], tab_name: str = "Tab 1", tab_outputs: Dict[str, TabOutput] = {}, fetch_result: Optional[FetchResult] = None, uploaded_files: Dict[str, UploadedFile] = {}, ) -> RenderOutcome: """Conveniently call the module's `render_thrift()`. The calling convention is designed for ease of testing. """ # tempfile will be deleted in __exit__(). fd, output_filename = mkstemp(prefix="out-", suffix=".arrow", dir=self.basedir) os.close(fd) output_path = Path(output_filename) with arrow_table_context(table, dir=self.basedir) as (input_path, _): old_cwd = os.getcwd() os.chdir(self.basedir) try: thrift_result = cjwkernel.pandas.module.render_thrift( ttypes.RenderRequest( basedir=self.basedir, input_filename=input_path.name, params=pydict_to_thrift_json_object(params), tab_name=tab_name, tab_outputs={ k: arrow_tab_output_to_thrift(v) for k, v in tab_outputs.items() }, fetch_result=( arrow_fetch_result_to_thrift(fetch_result) if fetch_result is not None else None), uploaded_files={ k: arrow_uploaded_file_to_thrift(v) for k, v in uploaded_files.items() }, output_filename=output_path.name, )) finally: os.chdir(old_cwd) arrow_result = thrift_render_result_to_arrow(thrift_result) return RenderOutcome(arrow_result, output_path)
def test_default_render_returns_fetch_result(self): # Functionality used by libraryofcongress with ExitStack() as ctx: input_arrow_table = ctx.enter_context( arrow_table_context({"A": [1]}, dir=self.basedir) ) parquet_filename = Path( ctx.enter_context(parquet_file({"A": [2]}, dir=self.basedir)).name ).name out_filename = ctx.enter_context(tempfile_context(dir=self.basedir)).name thrift_result = module.render_thrift( ttypes.RenderRequest( str(self.basedir), arrow_arrow_table_to_thrift(input_arrow_table), {}, # params ttypes.Tab("tab-1", "Tab 1"), ttypes.FetchResult( parquet_filename, [ ttypes.RenderError( ttypes.I18nMessage( "TODO_i18n", { "text": ttypes.I18nArgument( string_value="A warning" ) }, ), [], ) ], ), out_filename, ) ) result = thrift_render_result_to_arrow(thrift_result, self.basedir) assert_render_result_equals( result, RenderResult( arrow_table({"A": [2]}), [RenderError(I18nMessage.TODO_i18n("A warning"))], ), )
def render( self, compiled_module: CompiledModule, basedir: Path, input_table: ArrowTable, params: Params, tab: Tab, fetch_result: Optional[FetchResult], output_filename: str, ) -> RenderResult: request = ttypes.RenderRequest( str(basedir), input_table.to_thrift(), params.to_thrift(), tab.to_thrift(), None if fetch_result is None else fetch_result.to_thrift(), output_filename, ) with _chroot_dir_context(provide_paths=[basedir], extract_paths=[basedir / output_filename ]) as chroot: result = self._run_in_child( chroot=chroot, chroot_paths=[basedir] + DATA_PATHS + PARQUET_PATHS + NETWORKING_PATHS, # TODO nix networking compiled_module=compiled_module, timeout=self.render_timeout, result=ttypes.RenderResult(), function="render_thrift", args=[request], ) if result.table.filename and result.table.filename != output_filename: raise ModuleExitedError(0, "Module wrote to wrong output file") # RenderResult.from_thrift() verifies all filenames passed by the # module are in the directory the module has access to. render_result = RenderResult.from_thrift(result, basedir) if render_result.table.table is not None: validate(render_result.table.table, render_result.table.metadata) return render_result