def _test_fetch( self, fetch_fn, *, params={}, secrets={}, last_fetch_result=None, input_table_parquet_path=None, output_filename=None, ): with ExitStack() as ctx: ctx.enter_context(patch.object(module, "fetch", fetch_fn)) if output_filename is None: # Make a temporary output filename -- this will make `fetch()` # complete, but callers won't be able to see the data it # outputs because we'll delete the file too soon. output_filename = ctx.enter_context( tempfile_context(dir=self.basedir)).name thrift_result = module.fetch_thrift( ttypes.FetchRequest( basedir=str(self.basedir), params=arrow_params_to_thrift(Params(params)), secrets=arrow_raw_params_to_thrift(RawParams(secrets)), last_fetch_result=( arrow_fetch_result_to_thrift(last_fetch_result) if last_fetch_result is not None else None), input_table_parquet_filename=(input_table_parquet_path.name if input_table_parquet_path is not None else None), output_filename=output_filename, )) return thrift_fetch_result_to_arrow(thrift_result, self.basedir)
def run_in_sandbox(compiled_module: CompiledModule, function: str, args: List[Any]) -> None: """Run `function` with `args`, and write the (Thrift) result to `sys.stdout`.""" # TODO sandbox -- will need an OS `clone()` with namespace, cgroups, .... # Run the user's code in a new (programmatic) module. # # This gives the user code a blank namespace -- exactly what we want. module_name = f"rawmodule.{compiled_module.module_slug}" user_code_module = types.ModuleType(module_name) sys.modules[module_name] = user_code_module # simulate "import" exec(compiled_module.code_object, user_code_module.__dict__) # And now ... now we're unsafe! Because `code_object` may be malicious, any # line of code from here on out gives undefined behavior. Luckily, a parent # is catching all possibile outcomes.... # Now override the pieces of the _default_ module with the user-supplied # ones. That way, when the default `render_pandas()` calls `render()`, that # `render()` is the user-code `render()` (if supplied). # # Good thing we've forked! This totally messes with global variables. module = cjwkernel.pandas.module for fn in ( "fetch", "fetch_arrow", "fetch_pandas", "fetch_thrift", "migrate_params", "migrate_params_thrift", "render", "render_arrow", "render_arrow_v1", "render_pandas", "render_thrift", ): if fn in user_code_module.__dict__: module.__dict__[fn] = user_code_module.__dict__[fn] # Set ModuleSpec global parameter -- module frameworks use it for params module.__dict__["ModuleSpec"] = load_spec(compiled_module.module_spec_dict) if function == "render_thrift": result = module.render_thrift(*args) elif function == "migrate_params_thrift": result = module.migrate_params_thrift(*args) elif function == "validate_thrift": result = module.validate_thrift(*args) elif function == "fetch_thrift": result = module.fetch_thrift(*args) else: raise NotImplementedError transport = thrift.transport.TTransport.TFileObjectTransport( sys.__stdout__.buffer) protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(transport) if result is not None: result.write(protocol) transport.flush()