def render( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, input_table: ArrowTable, params: Params, tab: Tab, fetch_result: Optional[FetchResult], output_filename: str, ) -> RenderResult: """Run the module's `render_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.RenderRequest( str(basedir_seen_by_module), arrow_arrow_table_to_thrift(input_table), arrow_params_to_thrift(params), arrow_tab_to_thrift(tab), (None if fetch_result is None else arrow_fetch_result_to_thrift(fetch_result)), output_filename, ) try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=pyspawner.NetworkConfig( ), # TODO disallow networking compiled_module=compiled_module, timeout=self.render_timeout, result=ttypes.RenderResult(), function="render_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() if result.table.filename and result.table.filename != output_filename: raise ModuleExitedError(compiled_module.module_slug, 0, "Module wrote to wrong output file") try: # thrift_render_result_to_arrow() verifies all filenames passed by # the module are in the directory the module has access to. It # assumes the Arrow file (if there is one) is untrusted, so it can # raise ValidateError render_result = thrift_render_result_to_arrow(result, basedir) except ValidateError as err: raise ModuleExitedError( compiled_module.module_slug, 0, "Module produced invalid data: %s" % str(err), ) return render_result
def render( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, input_filename: str, params: Dict[str, Any], tab_name: str, fetch_result: Optional[FetchResult], tab_outputs: List[TabOutput], uploaded_files: Dict[str, UploadedFile], output_filename: str, ) -> RenderResult: """Run the module's `render_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.RenderRequest( basedir=str(basedir_seen_by_module), params=pydict_to_thrift_json_object(params), tab_name=tab_name, tab_outputs={ k: arrow_tab_output_to_thrift(v) for k, v in tab_outputs.items() }, uploaded_files={ k: arrow_uploaded_file_to_thrift(v) for k, v in uploaded_files.items() }, fetch_result=(None if fetch_result is None else arrow_fetch_result_to_thrift(fetch_result)), output_filename=output_filename, input_filename=input_filename, ) if compiled_module.module_slug in {"pythoncode", "ACS2016"}: # TODO disallow networking; make network_config always None network_config = pyspawner.NetworkConfig() else: network_config = None try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=network_config, compiled_module=compiled_module, timeout=self.render_timeout, result=ttypes.RenderResult(), function="render_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() return thrift_render_result_to_arrow(result)
def call_render(render: Callable, request: ttypes.RenderRequest) -> ttypes.RenderResult: basedir = Path(request.basedir) table = load_trusted_arrow_file(basedir / request.input_filename) params = thrift_json_object_to_pydict(request.params) tab_outputs = { k: TabOutput( tab_name=v.tab_name, table=load_trusted_arrow_file(basedir / v.table_filename), ) for k, v in request.tab_outputs.items() } uploaded_files = { k: UploadedFile( name=v.name, path=(basedir / v.filename), uploaded_at=datetime.datetime.utcfromtimestamp( v.uploaded_at_timestampus / 1000000.0), ) for k, v in request.uploaded_files.items() } if request.fetch_result is None: fetch_result = None else: fetch_result = thrift_fetch_result_to_arrow(request.fetch_result, basedir) raw_result = render( table, params, settings=settings, tab_name=request.tab_name, tab_outputs=tab_outputs, uploaded_files=uploaded_files, fetch_result=fetch_result, ) if not isinstance(raw_result, ArrowRenderResult): # Crash. The module author wrote a buggy module. raise ValueError( "render_arrow_v1() must return a cjwmodule.arrow.types.ArrowRenderResult" ) with pa.ipc.RecordBatchFileWriter( basedir / request.output_filename, schema=raw_result.table.schema) as writer: writer.write_table(raw_result.table) return ttypes.RenderResult( errors=[arrow_render_error_to_thrift(e) for e in raw_result.errors], json=pydict_to_thrift_json_object(raw_result.json), )
def call_render(module_spec: ModuleSpec, render: Callable, request: ttypes.RenderRequest) -> ttypes.RenderResult: basedir = Path(request.basedir) input_path = basedir / request.input_filename table, columns = load_trusted_arrow_file_with_columns(input_path) params = _prepare_params( module_spec, thrift_json_object_to_pydict(request.params), basedir=basedir, uploaded_files={ k: thrift_uploaded_file_to_arrow(v) for k, v in request.uploaded_files.items() }, ) if request.fetch_result is None: fetch_result = None else: fetch_result = thrift_fetch_result_to_arrow(request.fetch_result, basedir) output_path = basedir / request.output_filename raw_result = render( table, params, output_path, columns=columns, settings=settings, tab_name=request.tab_name, fetch_result=fetch_result, ) # coerce result # # TODO omit all this code and rely on Workbench's validation. To do this: # # 1. Change all modules to return RenderResult # 2. Nix this coersion code _DEPRECATED_overwrite_to_fix_arrow_table_schema( output_path, fallback_schema=table.schema) if raw_result is None: errors = [] elif isinstance(raw_result, list): errors = coerce_RenderError_list(raw_result) else: raise ValueError("Unhandled raw_result") return ttypes.RenderResult( errors=[arrow_render_error_to_thrift(e) for e in errors], json={}, # this framework never produces JSON )
def render( self, compiled_module: CompiledModule, basedir: Path, input_table: ArrowTable, params: Params, tab: Tab, fetch_result: Optional[FetchResult], output_filename: str, ) -> RenderResult: request = ttypes.RenderRequest( str(basedir), input_table.to_thrift(), params.to_thrift(), tab.to_thrift(), None if fetch_result is None else fetch_result.to_thrift(), output_filename, ) with _chroot_dir_context(provide_paths=[basedir], extract_paths=[basedir / output_filename ]) as chroot: result = self._run_in_child( chroot=chroot, chroot_paths=[basedir] + DATA_PATHS + PARQUET_PATHS + NETWORKING_PATHS, # TODO nix networking compiled_module=compiled_module, timeout=self.render_timeout, result=ttypes.RenderResult(), function="render_thrift", args=[request], ) if result.table.filename and result.table.filename != output_filename: raise ModuleExitedError(0, "Module wrote to wrong output file") # RenderResult.from_thrift() verifies all filenames passed by the # module are in the directory the module has access to. render_result = RenderResult.from_thrift(result, basedir) if render_result.table.table is not None: validate(render_result.table.table, render_result.table.metadata) return render_result