Exemple #1
0
    def render(
        self,
        compiled_module: CompiledModule,
        chroot_context: ChrootContext,
        basedir: Path,
        input_table: ArrowTable,
        params: Params,
        tab: Tab,
        fetch_result: Optional[FetchResult],
        output_filename: str,
    ) -> RenderResult:
        """Run the module's `render_thrift()` function and return its result.

        Raise ModuleError if the module has a bug.
        """
        chroot_dir = chroot_context.chroot.root
        basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir)
        request = ttypes.RenderRequest(
            str(basedir_seen_by_module),
            arrow_arrow_table_to_thrift(input_table),
            arrow_params_to_thrift(params),
            arrow_tab_to_thrift(tab),
            (None if fetch_result is None else
             arrow_fetch_result_to_thrift(fetch_result)),
            output_filename,
        )
        try:
            with chroot_context.writable_file(basedir / output_filename):
                result = self._run_in_child(
                    chroot_dir=chroot_dir,
                    network_config=pyspawner.NetworkConfig(
                    ),  # TODO disallow networking
                    compiled_module=compiled_module,
                    timeout=self.render_timeout,
                    result=ttypes.RenderResult(),
                    function="render_thrift",
                    args=[request],
                )
        finally:
            chroot_context.clear_unowned_edits()

        if result.table.filename and result.table.filename != output_filename:
            raise ModuleExitedError(compiled_module.module_slug, 0,
                                    "Module wrote to wrong output file")

        try:
            # thrift_render_result_to_arrow() verifies all filenames passed by
            # the module are in the directory the module has access to. It
            # assumes the Arrow file (if there is one) is untrusted, so it can
            # raise ValidateError
            render_result = thrift_render_result_to_arrow(result, basedir)
        except ValidateError as err:
            raise ModuleExitedError(
                compiled_module.module_slug,
                0,
                "Module produced invalid data: %s" % str(err),
            )
        return render_result
Exemple #2
0
    def render(
        self,
        compiled_module: CompiledModule,
        chroot_context: ChrootContext,
        basedir: Path,
        input_filename: str,
        params: Dict[str, Any],
        tab_name: str,
        fetch_result: Optional[FetchResult],
        tab_outputs: List[TabOutput],
        uploaded_files: Dict[str, UploadedFile],
        output_filename: str,
    ) -> RenderResult:
        """Run the module's `render_thrift()` function and return its result.

        Raise ModuleError if the module has a bug.
        """
        chroot_dir = chroot_context.chroot.root
        basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir)
        request = ttypes.RenderRequest(
            basedir=str(basedir_seen_by_module),
            params=pydict_to_thrift_json_object(params),
            tab_name=tab_name,
            tab_outputs={
                k: arrow_tab_output_to_thrift(v)
                for k, v in tab_outputs.items()
            },
            uploaded_files={
                k: arrow_uploaded_file_to_thrift(v)
                for k, v in uploaded_files.items()
            },
            fetch_result=(None if fetch_result is None else
                          arrow_fetch_result_to_thrift(fetch_result)),
            output_filename=output_filename,
            input_filename=input_filename,
        )
        if compiled_module.module_slug in {"pythoncode", "ACS2016"}:
            # TODO disallow networking; make network_config always None
            network_config = pyspawner.NetworkConfig()
        else:
            network_config = None
        try:
            with chroot_context.writable_file(basedir / output_filename):
                result = self._run_in_child(
                    chroot_dir=chroot_dir,
                    network_config=network_config,
                    compiled_module=compiled_module,
                    timeout=self.render_timeout,
                    result=ttypes.RenderResult(),
                    function="render_thrift",
                    args=[request],
                )
        finally:
            chroot_context.clear_unowned_edits()

        return thrift_render_result_to_arrow(result)
Exemple #3
0
    def fetch(
        self,
        compiled_module: CompiledModule,
        chroot_context: ChrootContext,
        basedir: Path,
        params: Dict[str, Any],
        secrets: Dict[str, Any],
        last_fetch_result: Optional[FetchResult],
        input_parquet_filename: Optional[str],
        output_filename: str,
    ) -> FetchResult:
        """Run the module's `fetch_thrift()` function and return its result.

        Raise ModuleError if the module has a bug.
        """
        chroot_dir = chroot_context.chroot.root
        basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir)
        request = ttypes.FetchRequest(
            basedir=str(basedir_seen_by_module),
            params=pydict_to_thrift_json_object(params),
            secrets=pydict_to_thrift_json_object(secrets),
            last_fetch_result=(
                None if last_fetch_result is None else
                arrow_fetch_result_to_thrift(last_fetch_result)),
            input_table_parquet_filename=input_parquet_filename,
            output_filename=output_filename,
        )
        try:
            with chroot_context.writable_file(basedir / output_filename):
                result = self._run_in_child(
                    chroot_dir=chroot_dir,
                    network_config=pyspawner.NetworkConfig(),
                    compiled_module=compiled_module,
                    timeout=self.fetch_timeout,
                    result=ttypes.FetchResult(),
                    function="fetch_thrift",
                    args=[request],
                )
        finally:
            chroot_context.clear_unowned_edits()

        if result.filename and result.filename != output_filename:
            raise ModuleExitedError(compiled_module.module_slug, 0,
                                    "Module wrote to wrong output file")

        # TODO validate result isn't too large. If result is dataframe it makes
        # sense to truncate; but fetch results aren't necessarily data frames.
        # It's up to the module to enforce this logic ... but we need to set a
        # maximum file size.
        return thrift_fetch_result_to_arrow(result, basedir)
Exemple #4
0
    def test_SECURITY_private_network_is_off_limits(self):
        # The module cannot access a service on the private network.
        # Try to connect to Postgres -- we know it's there.
        postgres_ip = "10.2.3.4"
        port = 5432

        _spawn_and_communicate_or_raise(
            self._client,
            r"""
            import errno
            import socket
            try:
                with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
                    s.connect((%r, %r))
                    assert False, "connect() should have failed"
            except OSError as err:
                assert err.errno == errno.ECONNREFUSED
            """ % (postgres_ip, port),
            network_config=pyspawner.NetworkConfig(),
        )
Exemple #5
0
    def test_SECURITY_parent_ip_is_off_limits(self):
        # The module cannot access a service on its host
        hostname = socket.gethostname()
        host_ip = socket.gethostbyname(hostname)
        port = 19999  # arbitrary

        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            s.bind((host_ip, port))
            s.listen(1)

            _spawn_and_communicate_or_raise(
                self._client,
                r"""
                import errno
                import socket
                try:
                    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
                        s.connect((%r, %r))
                        assert False, "connect() should have failed"
                except OSError as err:
                    assert err.errno == errno.ECONNREFUSED
                """ % (host_ip, port),
                network_config=pyspawner.NetworkConfig(),
            )