def _test_fetch( self, fetch_fn, *, params={}, secrets={}, last_fetch_result=None, input_table_parquet_path=None, output_filename=None, ): with ExitStack() as ctx: ctx.enter_context(patch.object(module, "fetch", fetch_fn)) if output_filename is None: # Make a temporary output filename -- this will make `fetch()` # complete, but callers won't be able to see the data it # outputs because we'll delete the file too soon. output_filename = ctx.enter_context( tempfile_context(dir=self.basedir)).name thrift_result = module.fetch_thrift( ttypes.FetchRequest( basedir=str(self.basedir), params=arrow_params_to_thrift(Params(params)), secrets=arrow_raw_params_to_thrift(RawParams(secrets)), last_fetch_result=( arrow_fetch_result_to_thrift(last_fetch_result) if last_fetch_result is not None else None), input_table_parquet_filename=(input_table_parquet_path.name if input_table_parquet_path is not None else None), output_filename=output_filename, )) return thrift_fetch_result_to_arrow(thrift_result, self.basedir)
def fetch( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, params: Dict[str, Any], secrets: Dict[str, Any], last_fetch_result: Optional[FetchResult], input_parquet_filename: Optional[str], output_filename: str, ) -> FetchResult: """Run the module's `fetch_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.FetchRequest( basedir=str(basedir_seen_by_module), params=pydict_to_thrift_json_object(params), secrets=pydict_to_thrift_json_object(secrets), last_fetch_result=( None if last_fetch_result is None else arrow_fetch_result_to_thrift(last_fetch_result)), input_table_parquet_filename=input_parquet_filename, output_filename=output_filename, ) try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=pyspawner.NetworkConfig(), compiled_module=compiled_module, timeout=self.fetch_timeout, result=ttypes.FetchResult(), function="fetch_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() if result.filename and result.filename != output_filename: raise ModuleExitedError(compiled_module.module_slug, 0, "Module wrote to wrong output file") # TODO validate result isn't too large. If result is dataframe it makes # sense to truncate; but fetch results aren't necessarily data frames. # It's up to the module to enforce this logic ... but we need to set a # maximum file size. return thrift_fetch_result_to_arrow(result, basedir)
def fetch( self, compiled_module: CompiledModule, basedir: Path, params: Params, secrets: Dict[str, Any], last_fetch_result: Optional[FetchResult], input_parquet_filename: str, output_filename: str, ) -> FetchResult: request = ttypes.FetchRequest( str(basedir), params.to_thrift(), RawParams(secrets).to_thrift(), None if last_fetch_result is None else last_fetch_result.to_thrift(), input_parquet_filename, output_filename, ) with _chroot_dir_context(provide_paths=[basedir], extract_paths=[basedir / output_filename ]) as chroot: result = self._run_in_child( chroot=chroot, chroot_paths=[basedir] + DATA_PATHS + PARQUET_PATHS + NETWORKING_PATHS, compiled_module=compiled_module, timeout=self.fetch_timeout, result=ttypes.FetchResult(), function="fetch_thrift", args=[request], ) if result.filename and result.filename != output_filename: raise ModuleExitedError(0, "Module wrote to wrong output file") # TODO validate result isn't too large. If result is dataframe it makes # sense to truncate; but fetch results aren't necessarily data frames. # It's up to the module to enforce this logic ... but we need to set a # maximum file size. return FetchResult.from_thrift(result, basedir)