def _test_render(
         self,
         render_fn,
         arrow_table_dict={},
         arrow_table=None,
         params={},
         tab=Tab("tab-1", "Tab 1"),
         fetch_result=None,
         output_filename=None,
 ):
     with ExitStack() as ctx:
         if arrow_table is None:
             arrow_table = ctx.enter_context(
                 arrow_table_context(arrow_table_dict, dir=self.basedir))
         ctx.enter_context(patch.object(module, "render", render_fn))
         out_filename = ctx.enter_context(
             tempfile_context(dir=self.basedir)).name
         thrift_result = module.render_thrift(
             ttypes.RenderRequest(
                 str(self.basedir),
                 arrow_arrow_table_to_thrift(arrow_table),
                 arrow_params_to_thrift(Params(params)),
                 arrow_tab_to_thrift(tab),
                 arrow_fetch_result_to_thrift(fetch_result)
                 if fetch_result is not None else None,
                 out_filename,
             ))
         return thrift_render_result_to_arrow(thrift_result, self.basedir)
 def _test_fetch(
     self,
     fetch_fn,
     *,
     params={},
     secrets={},
     last_fetch_result=None,
     input_table_parquet_path=None,
     output_filename=None,
 ):
     with ExitStack() as ctx:
         ctx.enter_context(patch.object(module, "fetch", fetch_fn))
         if output_filename is None:
             # Make a temporary output filename -- this will make `fetch()`
             # complete, but callers won't be able to see the data it
             # outputs because we'll delete the file too soon.
             output_filename = ctx.enter_context(
                 tempfile_context(dir=self.basedir)).name
         thrift_result = module.fetch_thrift(
             ttypes.FetchRequest(
                 basedir=str(self.basedir),
                 params=arrow_params_to_thrift(Params(params)),
                 secrets=arrow_raw_params_to_thrift(RawParams(secrets)),
                 last_fetch_result=(
                     arrow_fetch_result_to_thrift(last_fetch_result)
                     if last_fetch_result is not None else None),
                 input_table_parquet_filename=(input_table_parquet_path.name
                                               if input_table_parquet_path
                                               is not None else None),
                 output_filename=output_filename,
             ))
         return thrift_fetch_result_to_arrow(thrift_result, self.basedir)
Exemple #3
0
    def render(
        self,
        compiled_module: CompiledModule,
        chroot_context: ChrootContext,
        basedir: Path,
        input_table: ArrowTable,
        params: Params,
        tab: Tab,
        fetch_result: Optional[FetchResult],
        output_filename: str,
    ) -> RenderResult:
        """Run the module's `render_thrift()` function and return its result.

        Raise ModuleError if the module has a bug.
        """
        chroot_dir = chroot_context.chroot.root
        basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir)
        request = ttypes.RenderRequest(
            str(basedir_seen_by_module),
            arrow_arrow_table_to_thrift(input_table),
            arrow_params_to_thrift(params),
            arrow_tab_to_thrift(tab),
            (None if fetch_result is None else
             arrow_fetch_result_to_thrift(fetch_result)),
            output_filename,
        )
        try:
            with chroot_context.writable_file(basedir / output_filename):
                result = self._run_in_child(
                    chroot_dir=chroot_dir,
                    network_config=pyspawner.NetworkConfig(
                    ),  # TODO disallow networking
                    compiled_module=compiled_module,
                    timeout=self.render_timeout,
                    result=ttypes.RenderResult(),
                    function="render_thrift",
                    args=[request],
                )
        finally:
            chroot_context.clear_unowned_edits()

        if result.table.filename and result.table.filename != output_filename:
            raise ModuleExitedError(compiled_module.module_slug, 0,
                                    "Module wrote to wrong output file")

        try:
            # thrift_render_result_to_arrow() verifies all filenames passed by
            # the module are in the directory the module has access to. It
            # assumes the Arrow file (if there is one) is untrusted, so it can
            # raise ValidateError
            render_result = thrift_render_result_to_arrow(result, basedir)
        except ValidateError as err:
            raise ModuleExitedError(
                compiled_module.module_slug,
                0,
                "Module produced invalid data: %s" % str(err),
            )
        return render_result
 def test_params_to_thrift(self):
     self.assertEqual(
         types.arrow_params_to_thrift(
             types.Params(
                 {
                     "str": "s",
                     "int": 2,
                     "float": 1.2,
                     "null": None,
                     "bool": False,
                     "column": types.Column(
                         "A", types.ColumnType.Number(format="{:,.2f}")
                     ),
                     "listofmaps": [{"A": "a", "B": "b"}, {"C": "c", "D": "d"}],
                     "tab": "TODO tabs",
                 }
             )
         ),
         {
             "str": ttypes.ParamValue(string_value="s"),
             "int": ttypes.ParamValue(integer_value=2),
             "float": ttypes.ParamValue(float_value=1.2),
             "null": ttypes.ParamValue(),
             "bool": ttypes.ParamValue(boolean_value=False),
             "column": ttypes.ParamValue(
                 column_value=ttypes.Column(
                     "A",
                     ttypes.ColumnType(
                         number_type=ttypes.ColumnTypeNumber(format="{:,.2f}")
                     ),
                 )
             ),
             "listofmaps": ttypes.ParamValue(
                 list_value=[
                     ttypes.ParamValue(
                         map_value={
                             "A": ttypes.ParamValue(string_value="a"),
                             "B": ttypes.ParamValue(string_value="b"),
                         }
                     ),
                     ttypes.ParamValue(
                         map_value={
                             "C": ttypes.ParamValue(string_value="c"),
                             "D": ttypes.ParamValue(string_value="d"),
                         }
                     ),
                 ]
             ),
             "tab": ttypes.ParamValue(string_value="TODO tabs"),
         },
     )
Exemple #5
0
    def fetch(
        self,
        compiled_module: CompiledModule,
        chroot_context: ChrootContext,
        basedir: Path,
        params: Params,
        secrets: Dict[str, Any],
        last_fetch_result: Optional[FetchResult],
        input_parquet_filename: Optional[str],
        output_filename: str,
    ) -> FetchResult:
        """
        Run the module's `fetch_thrift()` function and return its result.

        Raise ModuleError if the module has a bug.
        """
        chroot_dir = chroot_context.chroot.root
        basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir)
        request = ttypes.FetchRequest(
            str(basedir_seen_by_module),
            arrow_params_to_thrift(params),
            arrow_raw_params_to_thrift(RawParams(secrets)),
            (None if last_fetch_result is None else
             arrow_fetch_result_to_thrift(last_fetch_result)),
            input_parquet_filename,
            output_filename,
        )
        try:
            with chroot_context.writable_file(basedir / output_filename):
                result = self._run_in_child(
                    chroot_dir=chroot_dir,
                    network_config=pyspawner.NetworkConfig(),
                    compiled_module=compiled_module,
                    timeout=self.fetch_timeout,
                    result=ttypes.FetchResult(),
                    function="fetch_thrift",
                    args=[request],
                )
        finally:
            chroot_context.clear_unowned_edits()

        if result.filename and result.filename != output_filename:
            raise ModuleExitedError(0, "Module wrote to wrong output file")

        # TODO validate result isn't too large. If result is dataframe it makes
        # sense to truncate; but fetch results aren't necessarily data frames.
        # It's up to the module to enforce this logic ... but we need to set a
        # maximum file size.
        return thrift_fetch_result_to_arrow(result, basedir)
Exemple #6
0
 def test_params_filename_to_thrift(self):
     path = self.basedir / "x.bin"
     self.assertEqual(
         types.arrow_params_to_thrift(types.Params({"A": path})),
         {"A": ttypes.ParamValue(filename_value="x.bin")},
     )