예제 #1
0
    def render(
        self,
        compiled_module: CompiledModule,
        chroot_context: ChrootContext,
        basedir: Path,
        input_table: ArrowTable,
        params: Params,
        tab: Tab,
        fetch_result: Optional[FetchResult],
        output_filename: str,
    ) -> RenderResult:
        """Run the module's `render_thrift()` function and return its result.

        Raise ModuleError if the module has a bug.
        """
        chroot_dir = chroot_context.chroot.root
        basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir)
        request = ttypes.RenderRequest(
            str(basedir_seen_by_module),
            arrow_arrow_table_to_thrift(input_table),
            arrow_params_to_thrift(params),
            arrow_tab_to_thrift(tab),
            (None if fetch_result is None else
             arrow_fetch_result_to_thrift(fetch_result)),
            output_filename,
        )
        try:
            with chroot_context.writable_file(basedir / output_filename):
                result = self._run_in_child(
                    chroot_dir=chroot_dir,
                    network_config=pyspawner.NetworkConfig(
                    ),  # TODO disallow networking
                    compiled_module=compiled_module,
                    timeout=self.render_timeout,
                    result=ttypes.RenderResult(),
                    function="render_thrift",
                    args=[request],
                )
        finally:
            chroot_context.clear_unowned_edits()

        if result.table.filename and result.table.filename != output_filename:
            raise ModuleExitedError(compiled_module.module_slug, 0,
                                    "Module wrote to wrong output file")

        try:
            # thrift_render_result_to_arrow() verifies all filenames passed by
            # the module are in the directory the module has access to. It
            # assumes the Arrow file (if there is one) is untrusted, so it can
            # raise ValidateError
            render_result = thrift_render_result_to_arrow(result, basedir)
        except ValidateError as err:
            raise ModuleExitedError(
                compiled_module.module_slug,
                0,
                "Module produced invalid data: %s" % str(err),
            )
        return render_result
예제 #2
0
    def render(
        self,
        compiled_module: CompiledModule,
        chroot_context: ChrootContext,
        basedir: Path,
        input_filename: str,
        params: Dict[str, Any],
        tab_name: str,
        fetch_result: Optional[FetchResult],
        tab_outputs: List[TabOutput],
        uploaded_files: Dict[str, UploadedFile],
        output_filename: str,
    ) -> RenderResult:
        """Run the module's `render_thrift()` function and return its result.

        Raise ModuleError if the module has a bug.
        """
        chroot_dir = chroot_context.chroot.root
        basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir)
        request = ttypes.RenderRequest(
            basedir=str(basedir_seen_by_module),
            params=pydict_to_thrift_json_object(params),
            tab_name=tab_name,
            tab_outputs={
                k: arrow_tab_output_to_thrift(v)
                for k, v in tab_outputs.items()
            },
            uploaded_files={
                k: arrow_uploaded_file_to_thrift(v)
                for k, v in uploaded_files.items()
            },
            fetch_result=(None if fetch_result is None else
                          arrow_fetch_result_to_thrift(fetch_result)),
            output_filename=output_filename,
            input_filename=input_filename,
        )
        if compiled_module.module_slug in {"pythoncode", "ACS2016"}:
            # TODO disallow networking; make network_config always None
            network_config = pyspawner.NetworkConfig()
        else:
            network_config = None
        try:
            with chroot_context.writable_file(basedir / output_filename):
                result = self._run_in_child(
                    chroot_dir=chroot_dir,
                    network_config=network_config,
                    compiled_module=compiled_module,
                    timeout=self.render_timeout,
                    result=ttypes.RenderResult(),
                    function="render_thrift",
                    args=[request],
                )
        finally:
            chroot_context.clear_unowned_edits()

        return thrift_render_result_to_arrow(result)
예제 #3
0
def call_render(render: Callable,
                request: ttypes.RenderRequest) -> ttypes.RenderResult:
    basedir = Path(request.basedir)
    table = load_trusted_arrow_file(basedir / request.input_filename)
    params = thrift_json_object_to_pydict(request.params)

    tab_outputs = {
        k: TabOutput(
            tab_name=v.tab_name,
            table=load_trusted_arrow_file(basedir / v.table_filename),
        )
        for k, v in request.tab_outputs.items()
    }

    uploaded_files = {
        k: UploadedFile(
            name=v.name,
            path=(basedir / v.filename),
            uploaded_at=datetime.datetime.utcfromtimestamp(
                v.uploaded_at_timestampus / 1000000.0),
        )
        for k, v in request.uploaded_files.items()
    }

    if request.fetch_result is None:
        fetch_result = None
    else:
        fetch_result = thrift_fetch_result_to_arrow(request.fetch_result,
                                                    basedir)

    raw_result = render(
        table,
        params,
        settings=settings,
        tab_name=request.tab_name,
        tab_outputs=tab_outputs,
        uploaded_files=uploaded_files,
        fetch_result=fetch_result,
    )

    if not isinstance(raw_result, ArrowRenderResult):
        # Crash. The module author wrote a buggy module.
        raise ValueError(
            "render_arrow_v1() must return a cjwmodule.arrow.types.ArrowRenderResult"
        )

    with pa.ipc.RecordBatchFileWriter(
            basedir / request.output_filename,
            schema=raw_result.table.schema) as writer:
        writer.write_table(raw_result.table)

    return ttypes.RenderResult(
        errors=[arrow_render_error_to_thrift(e) for e in raw_result.errors],
        json=pydict_to_thrift_json_object(raw_result.json),
    )
예제 #4
0
def call_render(module_spec: ModuleSpec, render: Callable,
                request: ttypes.RenderRequest) -> ttypes.RenderResult:
    basedir = Path(request.basedir)
    input_path = basedir / request.input_filename
    table, columns = load_trusted_arrow_file_with_columns(input_path)
    params = _prepare_params(
        module_spec,
        thrift_json_object_to_pydict(request.params),
        basedir=basedir,
        uploaded_files={
            k: thrift_uploaded_file_to_arrow(v)
            for k, v in request.uploaded_files.items()
        },
    )
    if request.fetch_result is None:
        fetch_result = None
    else:
        fetch_result = thrift_fetch_result_to_arrow(request.fetch_result,
                                                    basedir)
    output_path = basedir / request.output_filename

    raw_result = render(
        table,
        params,
        output_path,
        columns=columns,
        settings=settings,
        tab_name=request.tab_name,
        fetch_result=fetch_result,
    )

    # coerce result
    #
    # TODO omit all this code and rely on Workbench's validation. To do this:
    #
    # 1. Change all modules to return RenderResult
    # 2. Nix this coersion code
    _DEPRECATED_overwrite_to_fix_arrow_table_schema(
        output_path, fallback_schema=table.schema)
    if raw_result is None:
        errors = []
    elif isinstance(raw_result, list):
        errors = coerce_RenderError_list(raw_result)
    else:
        raise ValueError("Unhandled raw_result")

    return ttypes.RenderResult(
        errors=[arrow_render_error_to_thrift(e) for e in errors],
        json={},  # this framework never produces JSON
    )
예제 #5
0
    def render(
        self,
        compiled_module: CompiledModule,
        basedir: Path,
        input_table: ArrowTable,
        params: Params,
        tab: Tab,
        fetch_result: Optional[FetchResult],
        output_filename: str,
    ) -> RenderResult:
        request = ttypes.RenderRequest(
            str(basedir),
            input_table.to_thrift(),
            params.to_thrift(),
            tab.to_thrift(),
            None if fetch_result is None else fetch_result.to_thrift(),
            output_filename,
        )
        with _chroot_dir_context(provide_paths=[basedir],
                                 extract_paths=[basedir / output_filename
                                                ]) as chroot:
            result = self._run_in_child(
                chroot=chroot,
                chroot_paths=[basedir] + DATA_PATHS + PARQUET_PATHS +
                NETWORKING_PATHS,  # TODO nix networking
                compiled_module=compiled_module,
                timeout=self.render_timeout,
                result=ttypes.RenderResult(),
                function="render_thrift",
                args=[request],
            )
            if result.table.filename and result.table.filename != output_filename:
                raise ModuleExitedError(0, "Module wrote to wrong output file")

        # RenderResult.from_thrift() verifies all filenames passed by the
        # module are in the directory the module has access to.
        render_result = RenderResult.from_thrift(result, basedir)
        if render_result.table.table is not None:
            validate(render_result.table.table, render_result.table.metadata)
        return render_result