def _test_render( self, render_fn, arrow_table_dict={}, arrow_table=None, params={}, tab=Tab("tab-1", "Tab 1"), fetch_result=None, output_filename=None, ): with ExitStack() as ctx: if arrow_table is None: arrow_table = ctx.enter_context( arrow_table_context(arrow_table_dict, dir=self.basedir)) ctx.enter_context(patch.object(module, "render", render_fn)) out_filename = ctx.enter_context( tempfile_context(dir=self.basedir)).name thrift_result = module.render_thrift( ttypes.RenderRequest( str(self.basedir), arrow_arrow_table_to_thrift(arrow_table), arrow_params_to_thrift(Params(params)), arrow_tab_to_thrift(tab), arrow_fetch_result_to_thrift(fetch_result) if fetch_result is not None else None, out_filename, )) return thrift_render_result_to_arrow(thrift_result, self.basedir)
def test_default_render_returns_fetch_result(self): # Functionality used by libraryofcongress with ExitStack() as ctx: input_arrow_table = ctx.enter_context( arrow_table_context({"A": [1]}, dir=self.basedir) ) parquet_filename = Path( ctx.enter_context(parquet_file({"A": [2]}, dir=self.basedir)).name ).name out_filename = ctx.enter_context(tempfile_context(dir=self.basedir)).name thrift_result = module.render_thrift( ttypes.RenderRequest( str(self.basedir), input_arrow_table.to_thrift(), Params({}).to_thrift(), ttypes.Tab("tab-1", "Tab 1"), ttypes.FetchResult( parquet_filename, [RenderError(I18nMessage.TODO_i18n("A warning")).to_thrift()], ), out_filename, ) ) result = RenderResult.from_thrift(thrift_result, self.basedir) assert_render_result_equals( result, RenderResult( arrow_table({"A": [2]}), [RenderError(I18nMessage.TODO_i18n("A warning"))], ), )
def run_in_sandbox(compiled_module: CompiledModule, function: str, args: List[Any]) -> None: """Run `function` with `args`, and write the (Thrift) result to `sys.stdout`.""" # TODO sandbox -- will need an OS `clone()` with namespace, cgroups, .... # Run the user's code in a new (programmatic) module. # # This gives the user code a blank namespace -- exactly what we want. module_name = f"rawmodule.{compiled_module.module_slug}" user_code_module = types.ModuleType(module_name) sys.modules[module_name] = user_code_module # simulate "import" exec(compiled_module.code_object, user_code_module.__dict__) # And now ... now we're unsafe! Because `code_object` may be malicious, any # line of code from here on out gives undefined behavior. Luckily, a parent # is catching all possibile outcomes.... # Now override the pieces of the _default_ module with the user-supplied # ones. That way, when the default `render_pandas()` calls `render()`, that # `render()` is the user-code `render()` (if supplied). # # Good thing we've forked! This totally messes with global variables. module = cjwkernel.pandas.module for fn in ( "fetch", "fetch_arrow", "fetch_pandas", "fetch_thrift", "migrate_params", "migrate_params_thrift", "render", "render_arrow", "render_arrow_v1", "render_pandas", "render_thrift", ): if fn in user_code_module.__dict__: module.__dict__[fn] = user_code_module.__dict__[fn] # Set ModuleSpec global parameter -- module frameworks use it for params module.__dict__["ModuleSpec"] = load_spec(compiled_module.module_spec_dict) if function == "render_thrift": result = module.render_thrift(*args) elif function == "migrate_params_thrift": result = module.migrate_params_thrift(*args) elif function == "validate_thrift": result = module.validate_thrift(*args) elif function == "fetch_thrift": result = module.fetch_thrift(*args) else: raise NotImplementedError transport = thrift.transport.TTransport.TFileObjectTransport( sys.__stdout__.buffer) protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(transport) if result is not None: result.write(protocol) transport.flush()
def test_default_render_returns_fetch_result(self): # Functionality used by libraryofcongress with ExitStack() as ctx: input_arrow_table = ctx.enter_context( arrow_table_context({"A": [1]}, dir=self.basedir) ) parquet_filename = Path( ctx.enter_context(parquet_file({"A": [2]}, dir=self.basedir)).name ).name out_filename = ctx.enter_context(tempfile_context(dir=self.basedir)).name thrift_result = module.render_thrift( ttypes.RenderRequest( str(self.basedir), arrow_arrow_table_to_thrift(input_arrow_table), {}, # params ttypes.Tab("tab-1", "Tab 1"), ttypes.FetchResult( parquet_filename, [ ttypes.RenderError( ttypes.I18nMessage( "TODO_i18n", { "text": ttypes.I18nArgument( string_value="A warning" ) }, ), [], ) ], ), out_filename, ) ) result = thrift_render_result_to_arrow(thrift_result, self.basedir) assert_render_result_equals( result, RenderResult( arrow_table({"A": [2]}), [RenderError(I18nMessage.TODO_i18n("A warning"))], ), )