Beispiel #1
0
    def test_render_without_input_or_loads_data_raises_no_loaded_data(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": False},
            python_code="def render(table, params): return None",
        )

        result = self.run_with_async_db(
            execute_step(
                self.chroot_context,
                workflow,
                step,
                module_zipfile,
                {},
                Tab(tab.slug, tab.name),
                RenderResult(),
                {},
                self.output_path,
            ))
        assert_render_result_equals(
            result,
            RenderResult(errors=[
                RenderError(
                    I18nMessage("py.renderer.execute.step.NoLoadedDataError",
                                {}, None))
            ]),
        )
Beispiel #2
0
    def test_deleted_module(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="deleted_module",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        result = self.run_with_async_db(
            execute_step(
                chroot_context=self.chroot_context,
                workflow=workflow,
                step=step,
                module_zipfile=None,
                params={},
                tab_name=tab.name,
                input_path=self.empty_table_path,
                input_table_columns=[],
                tab_results={},
                output_path=self.output_path,
            )
        )
        self.assertEqual(result.columns, [])
        self.assertEqual(self.output_path.read_bytes(), b"")

        step.refresh_from_db()
        self.assertEqual(
            step.cached_render_result.errors,
            [RenderError(I18nMessage("py.renderer.execute.step.noModule", {}, None))],
        )
Beispiel #3
0
    def test_fetch_result_no_stored_object_means_none(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code=textwrap.dedent("""
                import pandas as pd
                def render(table, params, *, fetch_result, **kwargs):
                    assert fetch_result is None
                    return pd.DataFrame()
                """),
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
Beispiel #4
0
 def test_deleted_module(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     step = tab.steps.create(
         order=0,
         slug="step-1",
         module_id_name="deleted_module",
         last_relevant_delta_id=workflow.last_delta_id,
     )
     result = self.run_with_async_db(
         execute_step(
             self.chroot_context,
             workflow,
             step,
             None,
             {},
             tab.to_arrow(),
             RenderResult(),
             {},
             self.output_path,
         ))
     expected = RenderResult(errors=[
         RenderError(
             I18nMessage("py.renderer.execute.step.noModule", {}, None))
     ])
     assert_render_result_equals(result, expected)
     step.refresh_from_db()
     self.assertEqual(step.cached_render_result.errors, expected.errors)
Beispiel #5
0
    def test_fetch_result_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            fetch_errors=[
                RenderError(I18nMessage("foo", {}, "module")),
                RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")),
            ],
        )
        with parquet_file({"A": [1]}) as path:
            so = create_stored_object(workflow.id, step.id, path)
        step.stored_data_version = so.stored_at
        step.save(update_fields=["stored_data_version"])

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code=textwrap.dedent(
                """
                import pyarrow as pa
                import pandas as pd
                from pandas.testing import assert_frame_equal
                from cjwkernel.types import RenderError, I18nMessage

                def render(table, params, *, fetch_result, **kwargs):
                    assert fetch_result.errors == [
                        RenderError(I18nMessage("foo", {}, "module")),
                        RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")),
                    ]
                    fetch_dataframe = pa.parquet.read_table(str(fetch_result.path))
                    assert_frame_equal(fetch_dataframe, pd.DataFrame({"A": [1]}))
                    return pd.DataFrame()
                """
            ),
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )
Beispiel #6
0
    def test_report_module_error(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code="def render(table, params):\n  undefined()",
        )

        with self.assertLogs(level=logging.INFO):
            result = self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )
        self.assertEquals(result.columns, [])
        self.assertEqual(self.output_path.read_bytes(), b"")

        step.refresh_from_db()
        self.assertEqual(
            step.cached_render_result.errors,
            [
                RenderError(
                    I18nMessage(
                        "py.renderer.execute.step.user_visible_bug_during_render",
                        {
                            "message": "exit code 1: NameError: name 'undefined' is not defined"
                        },
                        None,
                    )
                )
            ],
        )
Beispiel #7
0
    def test_email_delta_when_errors_change(self, email_delta):
        user = create_test_user()
        workflow = Workflow.create_and_init(owner_id=user.id)
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        write_to_rendercache(
            workflow,
            step,
            workflow.last_delta_id - 1,  # stale
            table=make_table(),
            errors=[
                RenderError(I18nMessage("py.renderer.execute.step.noModule", {}, None))
            ],
        )

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            # returns different error
            python_code='import pandas as pd\ndef render(table, params): return [{"id": "err"}]',
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )

        email_delta.assert_called()  # there's new data
Beispiel #8
0
    def test_email_delta_when_errors_change(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            step,
            workflow.last_delta_id - 1,
            RenderResult(errors=[
                RenderError(
                    I18nMessage("py.renderer.execute.step.noModule", {}, None))
            ]),
        )
        step.last_relevant_delta_id = workflow.last_delta_id
        step.save(update_fields=["last_relevant_delta_id"])

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            # returns different error
            python_code=
            'import pandas as pd\ndef render(table, params): return [{"id": "err"}]',
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))

        email_delta.assert_called()  # there's new data
Beispiel #9
0
    def test_email_delta_when_stale_crr_is_unreachable(self, email_delta,
                                                       read_cache):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            step,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({})),  # does not write a Parquet file
        )
        step.last_relevant_delta_id = workflow.last_delta_id
        step.save(update_fields=["last_relevant_delta_id"])

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            # returns different data
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))

        read_cache.assert_not_called()  # it would give CorruptCacheError
        email_delta.assert_called()  # there's new data
Beispiel #10
0
    def test_email_delta_ignore_corrupt_cache_error(self, email_delta, read_cache):
        user = create_test_user()
        workflow = Workflow.create_and_init(owner_id=user.id)
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        write_to_rendercache(
            workflow,
            step,
            workflow.last_delta_id - 1,  # stale
            make_table(make_column("A", [1])),
        )
        read_cache.side_effect = rendercache.CorruptCacheError

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            # returns different data -- but CorruptCacheError means we won't care.
            python_code='import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )

        with self.assertLogs(level=logging.ERROR):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )

        email_delta.assert_not_called()
Beispiel #11
0
    def test_email_delta(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        rendercache.cache_render_result(
            workflow,
            step,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"A": [1]})),
        )
        step.last_relevant_delta_id = workflow.last_delta_id
        step.save(update_fields=["last_relevant_delta_id"])

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )
        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
        email_delta.assert_called()
        delta = email_delta.call_args[0][0]

        self.assertEqual(delta.user, workflow.owner)
        self.assertEqual(delta.workflow, workflow)
        self.assertEqual(delta.step, step)
Beispiel #12
0
    def test_email_delta(self, email_delta):
        user = create_test_user()
        workflow = Workflow.create_and_init(owner_id=user.id)
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            notifications=True,
        )
        write_to_rendercache(
            workflow,
            step,
            workflow.last_delta_id - 1,  # stale
            make_table(make_column("A", [1])),
        )

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code='import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )
        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )
        email_delta.assert_called()
        delta = email_delta.call_args[0][0]

        self.assertEqual(delta.user, workflow.owner)
        self.assertEqual(delta.workflow, workflow)
        self.assertEqual(delta.step, step)
Beispiel #13
0
    def test_fetch_result_deleted_file_means_none(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        with parquet_file({"A": [1]}) as path:
            so = create_stored_object(workflow.id, step.id, path)
        step.stored_data_version = so.stored_at
        step.save(update_fields=["stored_data_version"])
        # Now delete the file on S3 -- but leave the DB pointing to it.
        s3.remove(s3.StoredObjectsBucket, so.key)

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code=textwrap.dedent(
                """
                import pandas as pd
                def render(table, params, *, fetch_result, **kwargs):
                    assert fetch_result is None
                    return pd.DataFrame()
                """
            ),
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )
Beispiel #14
0
    def test_report_module_error(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code="def render(table, params):\n  undefined()",
        )

        with self.assertLogs(level=logging.INFO):
            result = self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
        assert_render_result_equals(
            result,
            RenderResult(errors=[
                RenderError(
                    I18nMessage(
                        "py.renderer.execute.step.user_visible_bug_during_render",
                        {
                            "message":
                            "exit code 1: NameError: name 'undefined' is not defined"
                        },
                        None,
                    ))
            ]),
        )
Beispiel #15
0
    def test_fetch_result_no_bucket_or_key_stored_object_means_none(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            stored_data_version=datetime.datetime.now(),
        )
        step.stored_objects.create(
            stored_at=step.stored_data_version, key="", size=0, hash="whatever"
        )

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code=textwrap.dedent(
                """
                import pandas as pd
                def render(table, params, *, fetch_result, **kwargs):
                    assert fetch_result is None
                    return pd.DataFrame()
                """
            ),
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )
Beispiel #16
0
    def test_email_delta_when_fresh_crr_is_unreachable(self, email_delta):
        user = create_test_user()
        workflow = Workflow.create_and_init(owner_id=user.id)
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            notifications=True,
        )
        write_to_rendercache(
            workflow,
            step,
            workflow.last_delta_id - 1,
            make_table(make_column("A", [1])),
        )

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            # returns empty result -- meaning, "unreachable"
            python_code="import pandas as pd\ndef render(table, params): return pd.DataFrame({})",
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )

        email_delta.assert_called()  # there's new data -- or, well, non-data
Beispiel #17
0
    def test_render_without_input_or_loads_data_raises_no_loaded_data(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": False},
            python_code="def render(table, params): return None",
        )

        result = self.run_with_async_db(
            execute_step(
                chroot_context=self.chroot_context,
                workflow=workflow,
                step=step,
                module_zipfile=module_zipfile,
                params={},
                tab_name=tab.name,
                input_path=self.empty_table_path,
                input_table_columns=[],
                tab_results={},
                output_path=self.output_path,
            )
        )
        self.assertEqual(result.columns, [])
        self.assertEqual(self.output_path.read_bytes(), b"")

        step.refresh_from_db()
        self.assertEqual(
            step.cached_render_result.errors,
            [
                RenderError(
                    I18nMessage("py.renderer.execute.step.NoLoadedDataError", {}, None)
                )
            ],
        )
Beispiel #18
0
    def test_email_no_delta_when_errors_stay_the_same(self, email_delta):
        user = create_test_user()
        workflow = Workflow.create_and_init(owner_id=user.id)
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        write_to_rendercache(
            workflow,
            step,
            workflow.last_delta_id - 1,  # stale
            table=make_table(),
            errors=[
                RenderError(I18nMessage("py.renderer.execute.step.noModule", {}, None))
            ],
        )

        self.run_with_async_db(
            execute_step(
                chroot_context=self.chroot_context,
                workflow=workflow,
                step=step,
                module_zipfile=None,  # will cause noModule error
                params={},
                tab_name=tab.name,
                input_path=self.empty_table_path,
                input_table_columns=[],
                tab_results={},
                output_path=self.output_path,
            )
        )

        email_delta.assert_not_called()  # error is the same error
Beispiel #19
0
    def test_email_no_delta_when_errors_stay_the_same(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            step,
            workflow.last_delta_id - 1,
            RenderResult(errors=[
                RenderError(
                    I18nMessage("py.renderer.execute.step.noModule", {}, None))
            ]),
        )
        step.last_relevant_delta_id = workflow.last_delta_id
        step.save(update_fields=["last_relevant_delta_id"])

        self.run_with_async_db(
            execute_step(
                self.chroot_context,
                workflow,
                step,
                None,  # module_zipfile
                {},
                Tab(tab.slug, tab.name),
                RenderResult(),
                {},
                self.output_path,
            ))

        email_delta.assert_not_called()  # error is the same error
Beispiel #20
0
    def test_email_delta_when_stale_crr_is_unreachable(self, email_delta, read_cache):
        user = create_test_user()
        workflow = Workflow.create_and_init(owner_id=user.id)
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            notifications=True,
        )
        # "unreachable" previous step
        write_to_rendercache(workflow, step, workflow.last_delta_id - 1, make_table())

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            # returns different data
            python_code='import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )

        read_cache.assert_not_called()  # it would give CorruptCacheError
        email_delta.assert_called()  # there's new data