def test_delete_step(self):
        result = RenderResult(arrow_table({"A": [1]}),
                              [RenderError(I18nMessage("X", {}, None), [])],
                              {})
        cache_render_result(self.workflow, self.step, 1, result)

        parquet_key = crr_parquet_key(self.step.cached_render_result)
        self.step.delete()
        self.assertFalse(s3.exists(BUCKET, parquet_key))
    def test_clean_tabs_preserve_ordering(self):
        tab2 = Tab("tab-2", "Tab 2")
        tab2_output = arrow_table({"B": [1]})
        tab3 = Tab("tab-3", "Tab 3")
        tab3_output = arrow_table({"C": [1]})

        context = self._render_context(
            # RenderContext's dict ordering determines desired tab order.
            # (Python 3.7 spec: dict is ordered in insertion order. CPython 3.6
            # and PyPy 7 do this, too.)
            tab_results={
                tab3: RenderResult(tab3_output),
                tab2: RenderResult(tab2_output),
            }
        )
        # Supply wrongly-ordered tabs; renderprep should reorder them.
        result = clean_value(ParamDType.Multitab(), ["tab-2", "tab-3"], context)
        self.assertEqual([t.tab.slug for t in result], ["tab-3", "tab-2"])
Beispiel #3
0
    def test_render_with_parquet_fetch_result(self):
        def render(*args, fetch_result):
            return fetch_result

        with parquet_file({"A": ["fetched"]}, dir=self.basedir) as pf:
            result = self._test_render(render, fetch_result=FetchResult(pf))
            assert_render_result_equals(
                result, RenderResult(arrow_table({"A": ["fetched"]}))
            )
Beispiel #4
0
    def test_execute_cache_hit(self, fake_module):
        workflow = Workflow.objects.create()
        tab = workflow.tabs.create(position=0)
        delta = InitWorkflowCommand.create(workflow)
        wf_module1 = tab.wf_modules.create(order=0,
                                           slug="step-1",
                                           last_relevant_delta_id=delta.id)
        cache_render_result(workflow, wf_module1, delta.id,
                            RenderResult(arrow_table({"A": [1]})))
        wf_module2 = tab.wf_modules.create(order=1,
                                           slug="step-2",
                                           last_relevant_delta_id=delta.id)
        cache_render_result(workflow, wf_module2, delta.id,
                            RenderResult(arrow_table({"B": [2]})))

        self._execute(workflow)

        fake_module.assert_not_called()
Beispiel #5
0
    def test_empty_json(self):
        cache_render_result(self.workflow, self.step, 1,
                            RenderResult(arrow_table({"A": ["a", "b"]})))

        response = self._request()

        self.assertEqual(response.status_code, status.NOT_FOUND)
        self.assertEqual(json.loads(response.content),
                         {"error": "render result has no JSON"})
Beispiel #6
0
    def test_render_with_non_parquet_fetch_result(self):
        def render(table, params, *, fetch_result):
            return pd.DataFrame({"A": [fetch_result.path.read_text()]})

        with tempfile_context(dir=self.basedir) as tf:
            tf.write_bytes(b"abcd")
            result = self._test_render(render, fetch_result=FetchResult(tf))
            assert_render_result_equals(
                result, RenderResult(arrow_table({"A": ["abcd"]})))
 def test_invalid_parquet_is_corrupt_cache_error(self):
     result = RenderResult(arrow_table({"A": [1]}))
     cache_render_result(self.workflow, self.wf_module, self.delta.id,
                         result)
     crr = self.wf_module.cached_render_result
     minio.put_bytes(BUCKET, crr_parquet_key(crr), b"NOT PARQUET")
     with tempfile_context() as arrow_path:
         with self.assertRaises(CorruptCacheError):
             load_cached_render_result(crr, arrow_path)
Beispiel #8
0
def _wrap_render_errors(render_call):
    try:
        return render_call()
    except ModuleError as err:
        return RenderResult.from_deprecated_error(
            "Something unexpected happened. We have been notified and are "
            "working to fix it. If this persists, contact us. Error code: "
            + format_for_user_debugging(err)
        )
Beispiel #9
0
    def test_delete_wfmodule(self):
        result = RenderResult(
            arrow_table({"A": [1]}), [RenderError(I18nMessage("X", []), [])], {}
        )
        cache_render_result(self.workflow, self.wf_module, self.delta.id, result)

        parquet_key = crr_parquet_key(self.wf_module.cached_render_result)
        self.wf_module.delete()
        self.assertFalse(minio.exists(BUCKET, parquet_key))
    def test_email_delta_ignore_corrupt_cache_error(self, email_delta,
                                                    read_cache):
        read_cache.side_effect = rendercache.CorruptCacheError
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            wf_module,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"A": [1]})),
        )
        wf_module.last_relevant_delta_id = workflow.last_delta_id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        with arrow_table_context({"A": [2]}) as table2:

            def render(*args, **kwargs):
                return RenderResult(table2)

            with self._stub_module(render):
                with self.assertLogs(level=logging.ERROR):
                    self.run_with_async_db(
                        execute_wfmodule(
                            self.chroot_context,
                            workflow,
                            wf_module,
                            {},
                            Tab(tab.slug, tab.name),
                            RenderResult(),
                            {},
                            self.output_path,
                        ))

        email_delta.assert_not_called()
Beispiel #11
0
    def test_email_delta(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        rendercache.cache_render_result(
            workflow,
            step,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"A": [1]})),
        )
        step.last_relevant_delta_id = workflow.last_delta_id
        step.save(update_fields=["last_relevant_delta_id"])

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )
        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
        email_delta.assert_called()
        delta = email_delta.call_args[0][0]

        self.assertEqual(delta.user, workflow.owner)
        self.assertEqual(delta.workflow, workflow)
        self.assertEqual(delta.step, step)
Beispiel #12
0
    def test_report_module_error(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code="def render(table, params):\n  undefined()",
        )

        with self.assertLogs(level=logging.INFO):
            result = self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
        assert_render_result_equals(
            result,
            RenderResult(errors=[
                RenderError(
                    I18nMessage(
                        "py.renderer.execute.step.user_visible_bug_during_render",
                        {
                            "message":
                            "exit code 1: NameError: name 'undefined' is not defined"
                        },
                        None,
                    ))
            ]),
        )
Beispiel #13
0
    def test_resume_without_rerunning_unneeded_renders(self, fake_load_module):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta_id = workflow.last_delta_id
        ModuleVersion.create_or_replace_from_spec({
            "id_name": "mod",
            "name": "Mod",
            "category": "Clean",
            "parameters": []
        })

        # wf_module1: has a valid, cached result
        wf_module1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        cache_render_result(workflow, wf_module1, delta_id,
                            RenderResult(arrow_table({"A": [1]})))

        # wf_module2: has no cached result (must be rendered)
        wf_module2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )

        fake_loaded_module = Mock(LoadedModule)
        fake_loaded_module.migrate_params.return_value = {}
        fake_load_module.return_value = fake_loaded_module
        result2 = RenderResult(arrow_table({"A": [2]}))

        fake_loaded_module.render.return_value = result2
        self._execute(workflow)
        fake_loaded_module.render.assert_called_once()  # only with module2

        wf_module2.refresh_from_db()
        with open_cached_render_result(
                wf_module2.cached_render_result) as actual:
            assert_render_result_equals(actual, result2)
Beispiel #14
0
    def test_start_row_after_end_row(self):
        cache_render_result(
            self.workflow,
            self.step2,
            self.step2.last_relevant_delta_id,
            RenderResult(arrow_table({"A": [0, 1, 2, 3, 4]})),
        )

        response = self._request_step(self.step2, "?startrow=3&endrow=1")
        self.assertEqual(response.status_code, status.OK)
        self.assertEqual(read_streaming_json(response), [])
Beispiel #15
0
    def test_clear(self):
        result = RenderResult(arrow_table({"A": [1]}))
        cache_render_result(self.workflow, self.wf_module, self.delta.id,
                            result)
        parquet_key = crr_parquet_key(self.wf_module.cached_render_result)
        clear_cached_render_result_for_wf_module(self.wf_module)

        db_wf_module = WfModule.objects.get(id=self.wf_module.id)
        self.assertIsNone(db_wf_module.cached_render_result)

        self.assertFalse(minio.exists(BUCKET, parquet_key))
Beispiel #16
0
    def test_render_arrow_table_empty_output_table_is_empty(self):
        # The param name "arrow_table" is a special case
        def render(arrow_table, params, output_path, **kwargs):
            out = pa.table({})
            with pa.ipc.RecordBatchFileWriter(output_path, out.schema) as writer:
                writer.write_table(out)

        with ModuleTestEnv(render=render) as env:
            outcome = env.call_render(make_table(), {})
            self.assertEqual(outcome.result, RenderResult())
            self.assertEqual(outcome.read_table(), make_table())
Beispiel #17
0
def render_arrow(table, params, tab_name, fetch_result: Optional[FetchResult],
                 output_path: Path) -> RenderResult:
    if fetch_result is None:
        # empty table
        return RenderResult(ArrowTable())
    elif fetch_result.path is not None and parquet.file_has_parquet_magic_number(
            fetch_result.path):
        # Deprecated files: we used to parse in fetch() and store the result
        # as Parquet. Now we've lost the original file data, and we need to
        # support our oldest users.
        return _render_deprecated_parquet(fetch_result.path,
                                          fetch_result.errors, output_path,
                                          params)
    elif fetch_result.errors:
        # We've never stored errors+data. If there are errors, assume
        # there's no data.
        return RenderResult(ArrowTable(), fetch_result.errors)
    else:
        assert not fetch_result.errors  # we've never stored errors+data.
        return _render_file(fetch_result.path, output_path, params)
 def render(self, params: Dict[str, Any],
            fetch_result: Optional[FetchResult]):
     with tempfile_context(prefix="output-",
                           suffix=".arrow") as output_path:
         errors = render(ArrowTable(),
                         params,
                         output_path,
                         fetch_result=fetch_result)
         arrow_table = ArrowTable.from_arrow_file_with_inferred_metadata(
             output_path)
         yield RenderResult(arrow_table,
                            [RenderError(I18nMessage(*e)) for e in errors])
Beispiel #19
0
    def test_clip_out_of_bounds(self):
        cache_render_result(
            self.workflow,
            self.step2,
            self.step2.last_relevant_delta_id,
            RenderResult(arrow_table({"A": [0, 1]})),
        )

        # index out of bounds should clip
        response = self._request_step(self.step2, "?startrow=-1&endrow=500")
        self.assertEqual(response.status_code, 200)
        self.assertEqual(read_streaming_json(response), [{"A": 0}, {"A": 1}])
Beispiel #20
0
    def test_deprecated_current_table_csv(self):
        cache_render_result(
            self.workflow,
            self.step2,
            2,
            RenderResult(arrow_table({"A": ["a", "b"]})),
        )

        response = self.client.get(
            f"/public/moduledata/live/{self.step2.id}.csv")
        self.assertEqual(response.status_code, 200)
        self.assertEqual(b"".join(response.streaming_content), b"A\na\nb")
 def test_render_fetch_error(self):
     errors = [RenderResult(I18nMessage("x", {"y": "z"}))]
     with tempfile_context() as empty_path:
         result = render_arrow(
             ArrowTable(),
             P(),
             "tab-x",
             FetchResult(empty_path, errors),
             self.output_path,
         )
     assert_arrow_table_equals(result.table, ArrowTable())
     self.assertEqual(result.errors, errors)
Beispiel #22
0
    def render(
        self,
        compiled_module: CompiledModule,
        chroot_context: ChrootContext,
        basedir: Path,
        input_table: ArrowTable,
        params: Params,
        tab: Tab,
        fetch_result: Optional[FetchResult],
        output_filename: str,
    ) -> RenderResult:
        """
        Run the module's `render_thrift()` function and return its result.

        Raise ModuleError if the module has a bug.
        """
        chroot_dir = chroot_context.chroot.root
        basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir)
        request = ttypes.RenderRequest(
            str(basedir_seen_by_module),
            input_table.to_thrift(),
            params.to_thrift(),
            tab.to_thrift(),
            None if fetch_result is None else fetch_result.to_thrift(),
            output_filename,
        )
        try:
            with chroot_context.writable_file(basedir / output_filename):
                result = self._run_in_child(
                    chroot_dir=chroot_dir,
                    network_config=pyspawner.NetworkConfig(),  # TODO disallow networking
                    compiled_module=compiled_module,
                    timeout=self.render_timeout,
                    result=ttypes.RenderResult(),
                    function="render_thrift",
                    args=[request],
                )
        finally:
            chroot_context.clear_unowned_edits()

        if result.table.filename and result.table.filename != output_filename:
            raise ModuleExitedError(0, "Module wrote to wrong output file")

        try:
            # RenderResult.from_thrift() verifies all filenames passed by the
            # module are in the directory the module has access to. It assumes
            # the Arrow file (if there is one) is untrusted, so it can raise
            # ValidateError
            render_result = RenderResult.from_thrift(result, basedir)
        except ValidateError as err:
            raise ModuleExitedError(0, "Module produced invalid data: %s" % str(err))
        return render_result
Beispiel #23
0
    def test_wrong_column(self):
        cache_render_result(
            self.workflow,
            self.step1,
            self.step1.last_relevant_delta_id,
            RenderResult(arrow_table({"A": ["a", "b"]})),
        )

        response = self._request("B")

        self.assertEqual(response.status_code, status.NOT_FOUND)
        self.assertEqual(json.loads(response.content),
                         {"error": 'column "B" not found'})
Beispiel #24
0
    def test_email_no_delta_when_errors_stay_the_same(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            step,
            workflow.last_delta_id - 1,
            RenderResult(errors=[
                RenderError(
                    I18nMessage("py.renderer.execute.step.noModule", {}, None))
            ]),
        )
        step.last_relevant_delta_id = workflow.last_delta_id
        step.save(update_fields=["last_relevant_delta_id"])

        self.run_with_async_db(
            execute_step(
                self.chroot_context,
                workflow,
                step,
                None,  # module_zipfile
                {},
                Tab(tab.slug, tab.name),
                RenderResult(),
                {},
                self.output_path,
            ))

        email_delta.assert_not_called()  # error is the same error
Beispiel #25
0
    def test_execute_cache_hit(self):
        module_zipfile = create_module_zipfile("mod")
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=workflow.last_delta_id)
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        step2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=workflow.last_delta_id)
        rendercache.cache_render_result(
            workflow,
            step2,
            workflow.last_delta_id,
            RenderResult(arrow_table({"B": [2]})),
        )

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        with patch.object(Kernel,
                          "render",
                          side_effect=mock_render({"No": ["bad"]})):
            with self._execute(workflow, tab_flow, {}) as result:
                assert_render_result_equals(
                    result, RenderResult(arrow_table({"B": [2]}), []))
Beispiel #26
0
    def test_current_table_csv(self):
        cache_render_result(
            self.workflow,
            self.step2,
            2,
            RenderResult(arrow_table({"A": ["a", "b"]})),
        )

        response = self.client.get(
            f"/workflows/{self.workflow.id}/steps/step-2/current-result-table.csv"
        )
        self.assertEqual(response.status_code, 200)
        self.assertEqual(b"".join(response.streaming_content), b"A\na\nb")
Beispiel #27
0
    def test_json(self):
        cache_render_result(
            self.workflow,
            self.step2,
            2,
            RenderResult(arrow_table({"A": ["a", "b"]})),
        )

        response = self.client.get(
            f"/workflows/{self.workflow.id}/tiles/step-2/delta-2/0,0.json")
        self.assertEqual(response.status_code, 200)
        self.assertEqual(json.loads(response.content),
                         {"rows": [["a"], ["b"]]})
Beispiel #28
0
    def test_tile_row_out_of_bounds(self):
        cache_render_result(
            self.workflow,
            self.step2,
            2,
            RenderResult(arrow_table({"A": ["a", "b"]})),
        )

        response = self.client.get(
            f"/workflows/{self.workflow.id}/tiles/step-2/delta-2/1,0.json")
        self.assertEqual(response.status_code, status.NOT_FOUND)
        self.assertEqual(json.loads(response.content),
                         {"error": "tile out of bounds"})
Beispiel #29
0
    def test_json(self):
        cache_render_result(
            self.workflow,
            self.step,
            1,
            RenderResult(arrow_table({"A": ["a", "b"]}),
                         json={"hello": "world!"}),
        )

        response = self._request()

        self.assertEqual(response.status_code, status.OK)
        self.assertEqual(json.loads(response.content), {"hello": "world!"})
Beispiel #30
0
    def test_wrong_delta_id(self):
        cache_render_result(
            self.workflow,
            self.step2,
            self.step2.last_relevant_delta_id,
            RenderResult(arrow_table({"A": [0, 1, 2, 3, 4]})),
        )
        self.step2.last_relevant_delta_id = 99
        self.step2.save(update_fields=["last_relevant_delta_id"])

        response = self._request_slug_delta(self.step2.slug, 99)
        self.assertEqual(response.status_code, status.OK)
        self.assertEqual(json.loads(response.content), [])