def _test_render( self, render_fn, arrow_table_dict={}, arrow_table=None, params={}, tab=Tab("tab-1", "Tab 1"), fetch_result=None, output_filename=None, ): with ExitStack() as ctx: if arrow_table is None: arrow_table = ctx.enter_context( arrow_table_context(arrow_table_dict, dir=self.basedir)) ctx.enter_context(patch.object(module, "render", render_fn)) out_filename = ctx.enter_context( tempfile_context(dir=self.basedir)).name thrift_result = module.render_thrift( ttypes.RenderRequest( str(self.basedir), arrow_arrow_table_to_thrift(arrow_table), arrow_params_to_thrift(Params(params)), arrow_tab_to_thrift(tab), arrow_fetch_result_to_thrift(fetch_result) if fetch_result is not None else None, out_filename, )) return thrift_render_result_to_arrow(thrift_result, self.basedir)
def _test_fetch( self, fetch_fn, *, params={}, secrets={}, last_fetch_result=None, input_table_parquet_path=None, output_filename=None, ): with ExitStack() as ctx: ctx.enter_context(patch.object(module, "fetch", fetch_fn)) if output_filename is None: # Make a temporary output filename -- this will make `fetch()` # complete, but callers won't be able to see the data it # outputs because we'll delete the file too soon. output_filename = ctx.enter_context( tempfile_context(dir=self.basedir)).name thrift_result = module.fetch_thrift( ttypes.FetchRequest( basedir=str(self.basedir), params=arrow_params_to_thrift(Params(params)), secrets=arrow_raw_params_to_thrift(RawParams(secrets)), last_fetch_result=( arrow_fetch_result_to_thrift(last_fetch_result) if last_fetch_result is not None else None), input_table_parquet_filename=(input_table_parquet_path.name if input_table_parquet_path is not None else None), output_filename=output_filename, )) return thrift_fetch_result_to_arrow(thrift_result, self.basedir)
def render( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, input_table: ArrowTable, params: Params, tab: Tab, fetch_result: Optional[FetchResult], output_filename: str, ) -> RenderResult: """Run the module's `render_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.RenderRequest( str(basedir_seen_by_module), arrow_arrow_table_to_thrift(input_table), arrow_params_to_thrift(params), arrow_tab_to_thrift(tab), (None if fetch_result is None else arrow_fetch_result_to_thrift(fetch_result)), output_filename, ) try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=pyspawner.NetworkConfig( ), # TODO disallow networking compiled_module=compiled_module, timeout=self.render_timeout, result=ttypes.RenderResult(), function="render_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() if result.table.filename and result.table.filename != output_filename: raise ModuleExitedError(compiled_module.module_slug, 0, "Module wrote to wrong output file") try: # thrift_render_result_to_arrow() verifies all filenames passed by # the module are in the directory the module has access to. It # assumes the Arrow file (if there is one) is untrusted, so it can # raise ValidateError render_result = thrift_render_result_to_arrow(result, basedir) except ValidateError as err: raise ModuleExitedError( compiled_module.module_slug, 0, "Module produced invalid data: %s" % str(err), ) return render_result
def test_params_to_thrift(self): self.assertEqual( types.arrow_params_to_thrift( types.Params( { "str": "s", "int": 2, "float": 1.2, "null": None, "bool": False, "column": types.Column( "A", types.ColumnType.Number(format="{:,.2f}") ), "listofmaps": [{"A": "a", "B": "b"}, {"C": "c", "D": "d"}], "tab": "TODO tabs", } ) ), { "str": ttypes.ParamValue(string_value="s"), "int": ttypes.ParamValue(integer_value=2), "float": ttypes.ParamValue(float_value=1.2), "null": ttypes.ParamValue(), "bool": ttypes.ParamValue(boolean_value=False), "column": ttypes.ParamValue( column_value=ttypes.Column( "A", ttypes.ColumnType( number_type=ttypes.ColumnTypeNumber(format="{:,.2f}") ), ) ), "listofmaps": ttypes.ParamValue( list_value=[ ttypes.ParamValue( map_value={ "A": ttypes.ParamValue(string_value="a"), "B": ttypes.ParamValue(string_value="b"), } ), ttypes.ParamValue( map_value={ "C": ttypes.ParamValue(string_value="c"), "D": ttypes.ParamValue(string_value="d"), } ), ] ), "tab": ttypes.ParamValue(string_value="TODO tabs"), }, )
def fetch( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, params: Params, secrets: Dict[str, Any], last_fetch_result: Optional[FetchResult], input_parquet_filename: Optional[str], output_filename: str, ) -> FetchResult: """ Run the module's `fetch_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.FetchRequest( str(basedir_seen_by_module), arrow_params_to_thrift(params), arrow_raw_params_to_thrift(RawParams(secrets)), (None if last_fetch_result is None else arrow_fetch_result_to_thrift(last_fetch_result)), input_parquet_filename, output_filename, ) try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=pyspawner.NetworkConfig(), compiled_module=compiled_module, timeout=self.fetch_timeout, result=ttypes.FetchResult(), function="fetch_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() if result.filename and result.filename != output_filename: raise ModuleExitedError(0, "Module wrote to wrong output file") # TODO validate result isn't too large. If result is dataframe it makes # sense to truncate; but fetch results aren't necessarily data frames. # It's up to the module to enforce this logic ... but we need to set a # maximum file size. return thrift_fetch_result_to_arrow(result, basedir)
def test_params_filename_to_thrift(self): path = self.basedir / "x.bin" self.assertEqual( types.arrow_params_to_thrift(types.Params({"A": path})), {"A": ttypes.ParamValue(filename_value="x.bin")}, )