def test_default_returns_params(self): self.assertEqual( module.migrate_params_thrift( arrow_raw_params_to_thrift(RawParams({"A": [1], "B": "x"})) ), arrow_raw_params_to_thrift(RawParams({"A": [1], "B": "x"})), )
def _test_fetch( self, fetch_fn, *, params={}, secrets={}, last_fetch_result=None, input_table_parquet_path=None, output_filename=None, ): with ExitStack() as ctx: ctx.enter_context(patch.object(module, "fetch", fetch_fn)) if output_filename is None: # Make a temporary output filename -- this will make `fetch()` # complete, but callers won't be able to see the data it # outputs because we'll delete the file too soon. output_filename = ctx.enter_context( tempfile_context(dir=self.basedir)).name thrift_result = module.fetch_thrift( ttypes.FetchRequest( basedir=str(self.basedir), params=arrow_params_to_thrift(Params(params)), secrets=arrow_raw_params_to_thrift(RawParams(secrets)), last_fetch_result=( arrow_fetch_result_to_thrift(last_fetch_result) if last_fetch_result is not None else None), input_table_parquet_filename=(input_table_parquet_path.name if input_table_parquet_path is not None else None), output_filename=output_filename, )) return thrift_fetch_result_to_arrow(thrift_result, self.basedir)
def test_raw_params_to_thrift(self): self.assertEqual( types.arrow_raw_params_to_thrift( types.RawParams({ "A": "x", "B": [1, 2] })), ttypes.RawParams('{"A":"x","B":[1,2]}'), )
def migrate_params(self, compiled_module: CompiledModule, params: Dict[str, Any]) -> None: """Call a module's migrate_params().""" request = arrow_raw_params_to_thrift(RawParams(params)) response = self._run_in_child( chroot_dir=READONLY_CHROOT_DIR, network_config=None, compiled_module=compiled_module, timeout=self.migrate_params_timeout, result=ttypes.RawParams(), function="migrate_params_thrift", args=[request], ) return thrift_raw_params_to_arrow(response).params
def fetch( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, params: Params, secrets: Dict[str, Any], last_fetch_result: Optional[FetchResult], input_parquet_filename: Optional[str], output_filename: str, ) -> FetchResult: """ Run the module's `fetch_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.FetchRequest( str(basedir_seen_by_module), arrow_params_to_thrift(params), arrow_raw_params_to_thrift(RawParams(secrets)), (None if last_fetch_result is None else arrow_fetch_result_to_thrift(last_fetch_result)), input_parquet_filename, output_filename, ) try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=pyspawner.NetworkConfig(), compiled_module=compiled_module, timeout=self.fetch_timeout, result=ttypes.FetchResult(), function="fetch_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() if result.filename and result.filename != output_filename: raise ModuleExitedError(0, "Module wrote to wrong output file") # TODO validate result isn't too large. If result is dataframe it makes # sense to truncate; but fetch results aren't necessarily data frames. # It's up to the module to enforce this logic ... but we need to set a # maximum file size. return thrift_fetch_result_to_arrow(result, basedir)
def migrate_params_thrift(params: ttypes.RawParams): params_dict: Dict[str, Any] = thrift_raw_params_to_arrow(params).params result_dict = migrate_params(params_dict) return arrow_raw_params_to_thrift(types.RawParams(result_dict))
def _test(self, fn, params={}): with patch.object(module, "migrate_params", fn): thrift_result = module.migrate_params_thrift( arrow_raw_params_to_thrift(RawParams(params))) return thrift_raw_params_to_arrow(thrift_result).params