def test_execute_cache_hit(self):
        workflow = Workflow.objects.create()
        create_module_zipfile("mod")
        tab = workflow.tabs.create(position=0)
        delta = InitWorkflowCommand.create(workflow)
        wf_module1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=delta.id,
        )
        cache_render_result(workflow, wf_module1, delta.id,
                            RenderResult(arrow_table({"A": [1]})))
        wf_module2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            module_id_name="mod",
            last_relevant_delta_id=delta.id,
        )
        cache_render_result(workflow, wf_module2, delta.id,
                            RenderResult(arrow_table({"B": [2]})))

        with patch.object(Kernel, "render", return_value=None):
            self._execute(workflow)
            Kernel.render.assert_not_called()
    def test_execute_new_revision(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        create_module_zipfile(
            "mod",
            spec_kwargs={"loads_data": True},
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"B": [2]})',
        )
        step = tab.steps.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=1,
            module_id_name="mod",
        )
        cache_render_result(workflow, step, 1,
                            RenderResult(arrow_table({"A": [1]})))
        step.last_relevant_delta_id = 2
        step.save(update_fields=["last_relevant_delta_id"])

        self._execute(workflow)

        step.refresh_from_db()

        with open_cached_render_result(step.cached_render_result) as result:
            assert_render_result_equals(result,
                                        RenderResult(arrow_table({"B": [2]})))
    def test_execute_new_revision(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta1 = workflow.last_delta
        create_module_zipfile(
            "mod",
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"B": [2]})',
        )
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta1.id,
            module_id_name="mod",
        )

        result1 = RenderResult(arrow_table({"A": [1]}))
        cache_render_result(workflow, wf_module, delta1.id, result1)

        delta2 = InitWorkflowCommand.create(workflow)
        wf_module.last_relevant_delta_id = delta2.id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        self._execute(workflow)

        wf_module.refresh_from_db()

        with open_cached_render_result(
                wf_module.cached_render_result) as result:
            assert_render_result_equals(result,
                                        RenderResult(arrow_table({"B": [2]})))
    def test_email_no_delta_when_not_changed(self, email):
        workflow = Workflow.objects.create()
        tab = workflow.tabs.create(position=0)
        create_module_zipfile(
            "mod",
            spec_kwargs={"loads_data": True},
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [1]})',
        )
        step = tab.steps.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=1,
            module_id_name="mod",
            notifications=True,
        )
        cache_render_result(workflow, step, 1,
                            RenderResult(arrow_table({"A": [1]})))

        # Make a new delta, so we need to re-render. Give it the same output.
        step.last_relevant_delta_id = 2
        step.save(update_fields=["last_relevant_delta_id"])

        self._execute(workflow)

        email.assert_not_called()
Beispiel #5
0
    def test_email_no_delta_when_not_changed(self, email, fake_load_module):
        workflow = Workflow.objects.create()
        tab = workflow.tabs.create(position=0)
        delta1 = InitWorkflowCommand.create(workflow)
        ModuleVersion.create_or_replace_from_spec({
            "id_name": "mod",
            "name": "Mod",
            "category": "Clean",
            "parameters": []
        })
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta1.id,
            module_id_name="mod",
            notifications=True,
        )
        cache_render_result(workflow, wf_module, delta1.id,
                            RenderResult(arrow_table({"A": [1]})))

        # Make a new delta, so we need to re-render. Give it the same output.
        delta2 = InitWorkflowCommand.create(workflow)
        wf_module.last_relevant_delta_id = delta2.id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        fake_loaded_module = Mock(LoadedModule)
        fake_load_module.return_value = fake_loaded_module
        fake_loaded_module.migrate_params.return_value = {}
        fake_loaded_module.render.return_value = RenderResult(
            arrow_table({"A": [1]}))

        self._execute(workflow)

        email.assert_not_called()
    def test_email_no_delta_when_not_changed(self, email):
        workflow = Workflow.objects.create()
        tab = workflow.tabs.create(position=0)
        delta1 = InitWorkflowCommand.create(workflow)
        create_module_zipfile(
            "mod",
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [1]})',
        )
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta1.id,
            module_id_name="mod",
            notifications=True,
        )
        cache_render_result(workflow, wf_module, delta1.id,
                            RenderResult(arrow_table({"A": [1]})))

        # Make a new delta, so we need to re-render. Give it the same output.
        delta2 = InitWorkflowCommand.create(workflow)
        wf_module.last_relevant_delta_id = delta2.id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        self._execute(workflow)

        email.assert_not_called()
Beispiel #7
0
def _execute_wfmodule_save(
    workflow: Workflow, wf_module: WfModule, result: RenderResult
) -> SaveResult:
    """
    Call rendercache.cache_render_result() and build notifications.OutputDelta.

    All this runs synchronously within a database lock. (It's a separate
    function so that when we're done awaiting it, we can continue executing in
    a context that doesn't use a database thread.)

    Raise UnneededExecution if the WfModule has changed in the interim.
    """
    # raises UnneededExecution
    with locked_wf_module(workflow, wf_module) as safe_wf_module:
        if safe_wf_module.notifications:
            stale_crr = safe_wf_module.get_stale_cached_render_result()
            if stale_crr is None:
                stale_result = None
            else:
                try:
                    # Read entire old Parquet file, blocking
                    with rendercache.open_cached_render_result(
                        stale_crr
                    ) as stale_result:
                        pass  # stale_result is deleted from disk but still mmapped
                except rendercache.CorruptCacheError:
                    # No, let's not send an email. Corrupt cache probably means
                    # we've been messing with our codebase.
                    logger.exception(
                        "Ignoring CorruptCacheError on workflow %d, wf_module %d because we are about to overwrite it",
                        workflow.id,
                        wf_module.id,
                    )
                    stale_result = None
        else:
            stale_result = None

        rendercache.cache_render_result(
            workflow, safe_wf_module, wf_module.last_relevant_delta_id, result
        )

        if (
            safe_wf_module.notifications
            and stale_result is not None
            and result != stale_result
        ):
            safe_wf_module.has_unseen_notification = True
            safe_wf_module.save(update_fields=["has_unseen_notification"])
            maybe_delta = notifications.OutputDelta(
                safe_wf_module.workflow.owner,
                safe_wf_module.workflow,
                safe_wf_module,
                stale_result,
                result,
            )
        else:
            maybe_delta = None  # nothing to email
        return SaveResult(safe_wf_module.cached_render_result, maybe_delta)
Beispiel #8
0
    def test_execute_partial_cache_hit(self):
        module_zipfile = create_module_zipfile("mod")
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh. Should not render.
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        # step2: cached result is stale, so must be re-rendered
        step2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id - 1,
        )
        rendercache.cache_render_result(
            workflow,
            step2,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"B": [2]})),
        )
        step2.last_relevant_delta_id = workflow.last_delta_id
        step2.save(update_fields=["last_relevant_delta_id"])

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        with patch.object(Kernel,
                          "render",
                          side_effect=mock_render({"B": [3]})):
            with self._execute(workflow, tab_flow, {}) as result:
                expected = RenderResult(arrow_table({"B": [3]}))
                assert_render_result_equals(result, expected)

            Kernel.render.assert_called_once()  # step2, not step1

            self.assertRegex(
                # Output is to the correct file
                Kernel.render.call_args[1]["output_filename"],
                r"execute-tab-output.*\.arrow",
            )
Beispiel #9
0
    def test_resume_backtrack_on_corrupt_cache_error(self):
        module_zipfile = create_module_zipfile("mod")
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh -- but CORRUPT
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        minio.put_bytes(
            # Write corrupted data -- will lead to CorruptCacheError
            rendercache.io.BUCKET,
            rendercache.io.crr_parquet_key(step1.cached_render_result),
            b"CORRUPT",
        )
        # step2: no cached result -- must re-render
        step2 = tab.wf_modules.create(order=1,
                                      slug="step-2",
                                      module_id_name="mod")

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        with patch.object(Kernel,
                          "render",
                          side_effect=mock_render({"B": [2]})):
            with self._execute(workflow,
                               tab_flow, {},
                               expect_log_level=logging.ERROR) as result:
                expected = RenderResult(arrow_table({"B": [2]}))
                assert_render_result_equals(result, expected)

            self.assertEqual(
                # called with step1, then step2
                Kernel.render.call_count,
                2,
            )
            self.assertRegex(
                # Output is to the correct file
                Kernel.render.call_args[1]["output_filename"],
                r"execute-tab-output.*\.arrow",
            )
Beispiel #10
0
    def test_execute_partial_cache_hit(self, fake_load_module):
        ModuleVersion.create_or_replace_from_spec(
            {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []}
        )
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh. Should not render.
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        # step2: cached result is stale, so must be re-rendered
        step2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id - 1,
        )
        rendercache.cache_render_result(
            workflow,
            step2,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"B": [2]})),
        )
        step2.last_relevant_delta_id = workflow.last_delta_id
        step2.save(update_fields=["last_relevant_delta_id"])

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, ParamDType.Dict({}), {}),
                ExecuteStep(step2, ParamDType.Dict({}), {}),
            ],
        )

        expected = RenderResult(arrow_table({"B": [3]}))
        fake_load_module.return_value.render.return_value = expected
        with self._execute(workflow, tab_flow, {}) as result:
            assert_render_result_equals(result, expected)

        fake_load_module.return_value.render.assert_called_once()  # step2, not step1
        self.assertRegex(
            # Output is to the correct file
            fake_load_module.return_value.render.call_args[1]["output_filename"],
            r"execute-tab-output.*\.arrow",
        )
Beispiel #11
0
    def test_resume_backtrack_on_corrupt_cache_error(self, fake_load_module):
        ModuleVersion.create_or_replace_from_spec(
            {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []}
        )
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh -- but CORRUPT
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        minio.put_bytes(
            # Write corrupted data -- will lead to CorruptCacheError
            rendercache.io.BUCKET,
            rendercache.io.crr_parquet_key(step1.cached_render_result),
            b"CORRUPT",
        )
        # step2: no cached result -- must re-render
        step2 = tab.wf_modules.create(order=1, slug="step-2", module_id_name="mod")

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, ParamDType.Dict({}), {}),
                ExecuteStep(step2, ParamDType.Dict({}), {}),
            ],
        )

        expected = RenderResult(arrow_table({"B": [2]}))
        fake_load_module.return_value.render.return_value = expected
        with self._execute(
            workflow, tab_flow, {}, expect_log_level=logging.ERROR
        ) as result:
            assert_render_result_equals(result, expected)

        self.assertEqual(
            # called with step1, then step2
            fake_load_module.return_value.render.call_count,
            2,
        )
        self.assertRegex(
            # Output is to the correct file
            fake_load_module.return_value.render.call_args[1]["output_filename"],
            r"execute-tab-output.*\.arrow",
        )
Beispiel #12
0
    def test_email_delta_when_errors_change(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            step,
            workflow.last_delta_id - 1,
            RenderResult(errors=[
                RenderError(
                    I18nMessage("py.renderer.execute.step.noModule", {}, None))
            ]),
        )
        step.last_relevant_delta_id = workflow.last_delta_id
        step.save(update_fields=["last_relevant_delta_id"])

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            # returns different error
            python_code=
            'import pandas as pd\ndef render(table, params): return [{"id": "err"}]',
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))

        email_delta.assert_called()  # there's new data
Beispiel #13
0
    def test_email_delta_when_stale_crr_is_unreachable(self, email_delta,
                                                       read_cache):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            step,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({})),  # does not write a Parquet file
        )
        step.last_relevant_delta_id = workflow.last_delta_id
        step.save(update_fields=["last_relevant_delta_id"])

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            # returns different data
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    self.chroot_context,
                    workflow,
                    step,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))

        read_cache.assert_not_called()  # it would give CorruptCacheError
        email_delta.assert_called()  # there's new data
Beispiel #14
0
    def test_email_delta(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        rendercache.cache_render_result(
            workflow,
            wf_module,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"A": [1]})),
        )
        wf_module.last_relevant_delta_id = workflow.last_delta_id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        module_zipfile = create_module_zipfile(
            "x",
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )
        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
        email_delta.assert_called()
        delta = email_delta.call_args[0][0]

        self.assertEqual(delta.user, workflow.owner)
        self.assertEqual(delta.workflow, workflow)
        self.assertEqual(delta.wf_module, wf_module)
        self.assertEqual(delta.old_result, RenderResult(arrow_table({"A":
                                                                     [1]})))
        self.assertEqual(delta.new_result, RenderResult(arrow_table({"A":
                                                                     [2]})))
Beispiel #15
0
    def test_email_delta_ignore_corrupt_cache_error(self, email_delta,
                                                    read_cache):
        read_cache.side_effect = rendercache.CorruptCacheError
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            wf_module,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"A": [1]})),
        )
        wf_module.last_relevant_delta_id = workflow.last_delta_id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        module_zipfile = create_module_zipfile(
            "x",
            # returns different data -- but CorruptCacheError means we won't care.
            python_code=
            'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})',
        )

        with self.assertLogs(level=logging.ERROR):
            self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    module_zipfile,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))

        email_delta.assert_not_called()
    def test_email_delta(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        rendercache.cache_render_result(
            workflow,
            wf_module,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"A": [1]})),
        )
        wf_module.last_relevant_delta_id = workflow.last_delta_id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        with arrow_table_context({"A": [2]}) as table2:

            def render(*args, **kwargs):
                return RenderResult(table2)

            with self._stub_module(render):
                self.run_with_async_db(
                    execute_wfmodule(
                        self.chroot_context,
                        workflow,
                        wf_module,
                        {},
                        Tab(tab.slug, tab.name),
                        RenderResult(),
                        {},
                        self.output_path,
                    ))

        email_delta.assert_called()
        delta = email_delta.call_args[0][0]
        self.assertEqual(delta.user, workflow.owner)
        self.assertEqual(delta.workflow, workflow)
        self.assertEqual(delta.wf_module, wf_module)
        self.assertEqual(delta.old_result, RenderResult(arrow_table({"A":
                                                                     [1]})))
        self.assertEqual(delta.new_result, RenderResult(arrow_table({"A":
                                                                     [2]})))
 def test_workflow_view_triggers_render_if_stale_cache(self):
     step = self.tab1.wf_modules.create(
         order=0,
         slug="step-1",
         last_relevant_delta_id=self.delta.id,
         cached_render_result_delta_id=self.delta.id,  # stale
     )
     # Cache a result
     cache_render_result(self.workflow1, step, self.delta.id,
                         RenderResult(arrow_table({"A": ["a"]})))
     # Make the cached result stale. (The view will actually send the
     # stale-result metadata to the client. That's why we cached it.)
     delta2 = InitWorkflowCommand.create(self.workflow1)
     step.last_relevant_delta_id = delta2.id
     step.save(update_fields=["last_relevant_delta_id"])
     self.client.force_login(self.user)
     self.client.get("/workflows/%d/" % self.workflow1.id)
     self.queue_render.assert_called_with(self.workflow1.id, delta2.id)
Beispiel #18
0
    def test_execute_cache_hit(self, fake_module):
        workflow = Workflow.objects.create()
        tab = workflow.tabs.create(position=0)
        delta = InitWorkflowCommand.create(workflow)
        wf_module1 = tab.wf_modules.create(order=0,
                                           slug="step-1",
                                           last_relevant_delta_id=delta.id)
        cache_render_result(workflow, wf_module1, delta.id,
                            RenderResult(arrow_table({"A": [1]})))
        wf_module2 = tab.wf_modules.create(order=1,
                                           slug="step-2",
                                           last_relevant_delta_id=delta.id)
        cache_render_result(workflow, wf_module2, delta.id,
                            RenderResult(arrow_table({"B": [2]})))

        self._execute(workflow)

        fake_module.assert_not_called()
    def test_email_delta_ignore_corrupt_cache_error(self, email_delta,
                                                    read_cache):
        read_cache.side_effect = rendercache.CorruptCacheError
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            wf_module,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"A": [1]})),
        )
        wf_module.last_relevant_delta_id = workflow.last_delta_id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        with arrow_table_context({"A": [2]}) as table2:

            def render(*args, **kwargs):
                return RenderResult(table2)

            with self._stub_module(render):
                with self.assertLogs(level=logging.ERROR):
                    self.run_with_async_db(
                        execute_wfmodule(
                            self.chroot_context,
                            workflow,
                            wf_module,
                            {},
                            Tab(tab.slug, tab.name),
                            RenderResult(),
                            {},
                            self.output_path,
                        ))

        email_delta.assert_not_called()
Beispiel #20
0
    def test_load_input_cached_render_result(self):
        with arrow_table_context({"A": [1]}) as atable:
            input_render_result = RenderResult(atable)

            workflow = Workflow.create_and_init()
            step1 = workflow.tabs.first().steps.create(
                order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id
            )
            step2 = workflow.tabs.first().steps.create(order=1, slug="step-2")
            rendercache.cache_render_result(
                workflow, step1, workflow.last_delta_id, input_render_result
            )
            result = self.run_with_async_db(
                fetch.load_database_objects(workflow.id, step2.id)
            )
            input_crr = step1.cached_render_result
            assert input_crr is not None
            self.assertEqual(result[4], input_crr)
            self.assertEqual(result.input_cached_render_result, input_crr)
Beispiel #21
0
    def test_resume_without_rerunning_unneeded_renders(self, fake_load_module):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta_id = workflow.last_delta_id
        ModuleVersion.create_or_replace_from_spec({
            "id_name": "mod",
            "name": "Mod",
            "category": "Clean",
            "parameters": []
        })

        # wf_module1: has a valid, cached result
        wf_module1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        cache_render_result(workflow, wf_module1, delta_id,
                            RenderResult(arrow_table({"A": [1]})))

        # wf_module2: has no cached result (must be rendered)
        wf_module2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )

        fake_loaded_module = Mock(LoadedModule)
        fake_loaded_module.migrate_params.return_value = {}
        fake_load_module.return_value = fake_loaded_module
        result2 = RenderResult(arrow_table({"A": [2]}))

        fake_loaded_module.render.return_value = result2
        self._execute(workflow)
        fake_loaded_module.render.assert_called_once()  # only with module2

        wf_module2.refresh_from_db()
        with open_cached_render_result(
                wf_module2.cached_render_result) as actual:
            assert_render_result_equals(actual, result2)
 def test_workflow_view_triggers_render_if_stale_cache(self):
     step = self.tab1.steps.create(
         order=0,
         slug="step-1",
         last_relevant_delta_id=1,
         cached_render_result_delta_id=1,
     )
     # Cache a result
     cache_render_result(
         self.workflow1,
         step,
         1,
         RenderResult(arrow_table({"A": ["a"]})),
     )
     step.last_relevant_delta_id = 2
     step.save(update_fields=["last_relevant_delta_id"])
     self.client.force_login(self.user)
     self.client.get("/workflows/%d/" % self.workflow1.id)
     self.queue_render.assert_called_with(self.workflow1.id,
                                          self.workflow1.last_delta_id)
Beispiel #23
0
    def test_email_no_delta_when_errors_stay_the_same(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            step,
            workflow.last_delta_id - 1,
            RenderResult(errors=[
                RenderError(
                    I18nMessage("py.renderer.execute.step.noModule", {}, None))
            ]),
        )
        step.last_relevant_delta_id = workflow.last_delta_id
        step.save(update_fields=["last_relevant_delta_id"])

        self.run_with_async_db(
            execute_step(
                self.chroot_context,
                workflow,
                step,
                None,  # module_zipfile
                {},
                Tab(tab.slug, tab.name),
                RenderResult(),
                {},
                self.output_path,
            ))

        email_delta.assert_not_called()  # error is the same error
Beispiel #24
0
    def test_execute_cache_hit(self):
        module_zipfile = create_module_zipfile("mod")
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=workflow.last_delta_id)
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        step2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=workflow.last_delta_id)
        rendercache.cache_render_result(
            workflow,
            step2,
            workflow.last_delta_id,
            RenderResult(arrow_table({"B": [2]})),
        )

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        with patch.object(Kernel,
                          "render",
                          side_effect=mock_render({"No": ["bad"]})):
            with self._execute(workflow, tab_flow, {}) as result:
                assert_render_result_equals(
                    result, RenderResult(arrow_table({"B": [2]}), []))
    def test_resume_without_rerunning_unneeded_renders(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta_id = workflow.last_delta_id
        create_module_zipfile(
            # If this runs on step1, it'll return pd.DataFrame().
            # If this runs on step2, it'll return step1-output * 2.
            # ... step2's output depends on whether we run this on
            # step1.
            "mod",
            spec_kwargs={"loads_data": True},
            python_code="def render(table, params): return table * 2",
        )

        # step1: has a valid, cached result
        step1 = tab.steps.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=1,
            module_id_name="mod",
        )
        cache_render_result(workflow, step1, 1,
                            RenderResult(arrow_table({"A": [1]})))

        # step2: has no cached result (must be rendered)
        step2 = tab.steps.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=1,
            module_id_name="mod",
        )

        self._execute(workflow)

        step2.refresh_from_db()
        with open_cached_render_result(step2.cached_render_result) as actual:
            assert_render_result_equals(actual,
                                        RenderResult(arrow_table({"A": [2]})))
Beispiel #26
0
    def test_execute_new_revision(self, fake_load_module):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta1 = workflow.last_delta
        ModuleVersion.create_or_replace_from_spec({
            "id_name": "mod",
            "name": "Mod",
            "category": "Clean",
            "parameters": []
        })
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta1.id,
            module_id_name="mod",
        )

        result1 = RenderResult(arrow_table({"A": [1]}))
        cache_render_result(workflow, wf_module, delta1.id, result1)

        delta2 = InitWorkflowCommand.create(workflow)
        wf_module.last_relevant_delta_id = delta2.id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        result2 = RenderResult(arrow_table({"B": [2]}))
        fake_module = Mock(LoadedModule)
        fake_module.migrate_params.return_value = {}
        fake_load_module.return_value = fake_module
        fake_module.render.return_value = result2

        self._execute(workflow)

        wf_module.refresh_from_db()

        with open_cached_render_result(
                wf_module.cached_render_result) as result:
            assert_render_result_equals(result, result2)
Beispiel #27
0
    def test_execute_cache_hit(self, fake_module):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step1 = tab.wf_modules.create(
            order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        step2 = tab.wf_modules.create(
            order=1, slug="step-2", last_relevant_delta_id=workflow.last_delta_id
        )
        rendercache.cache_render_result(
            workflow,
            step2,
            workflow.last_delta_id,
            RenderResult(arrow_table({"B": [2]})),
        )

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, ParamDType.Dict({}), {}),
                ExecuteStep(step2, ParamDType.Dict({}), {}),
            ],
        )

        with self._execute(workflow, tab_flow, {}) as result:
            assert_render_result_equals(
                result, RenderResult(arrow_table({"B": [2]}), [])
            )

        fake_module.assert_not_called()
Beispiel #28
0
def _execute_step_save(
    workflow: Workflow, step: Step, result: LoadedRenderResult
) -> SaveResult:
    """Call rendercache.cache_render_result() and build notifications.OutputDelta.

    All this runs synchronously within a database lock. (It's a separate
    function so that when we're done awaiting it, we can continue executing in
    a context that doesn't use a database thread.)

    Raise UnneededExecution if the Step has changed in the interim.
    """
    # raises UnneededExecution
    with contextlib.ExitStack() as exit_stack:
        safe_step = exit_stack.enter_context(locked_step(workflow, step))
        if safe_step.notifications and workflow.owner_id is not None:
            stale_crr = safe_step.get_stale_cached_render_result()
            if stale_crr is None:
                stale_parquet_file = None
            elif stale_crr.status == "ok":
                try:
                    stale_parquet_file = exit_stack.enter_context(
                        rendercache.downloaded_parquet_file(stale_crr)
                    )
                except rendercache.CorruptCacheError:
                    # No, let's not send an email. Corrupt cache probably means
                    # we've been messing with our codebase.
                    logger.exception(
                        "Ignoring CorruptCacheError on workflow %d, step %d because we are about to overwrite it",
                        workflow.id,
                        step.id,
                    )
                    stale_crr = None
                    stale_parquet_file = None
            else:
                # status is 'error'/'unreachable'. There's no Parquet file.
                stale_parquet_file = None
        else:
            stale_crr = None
            stale_parquet_file = None

        rendercache.cache_render_result(
            workflow, safe_step, step.last_relevant_delta_id, result
        )

        is_changed = False  # nothing to email, usually
        if stale_crr is not None:
            fresh_crr = safe_step.cached_render_result

            if (
                fresh_crr.status != stale_crr.status
                or fresh_crr.errors != stale_crr.errors
                or fresh_crr.json != stale_crr.json
                or fresh_crr.table_metadata != stale_crr.table_metadata
            ):
                # Output other than table data has changed (e.g., nRows)
                is_changed = True

            if not is_changed and fresh_crr.status == "ok":
                # Download the new parquet file and compare to the old one
                fresh_parquet_file = exit_stack.enter_context(
                    rendercache.downloaded_parquet_file(fresh_crr)
                )
                is_changed = not cjwparquet.are_files_equal(
                    stale_parquet_file, fresh_parquet_file
                )

        if is_changed:
            with connection.cursor() as cursor:
                # Don't import cjworkbench.models.userprofile: it relies on
                # settings.FREE_TIER_USAGE_LIMITS, buy renderer doesn't set it.
                #
                # TODO nix django-ORM.
                cursor.execute(
                    """
                    SELECT locale_id
                    FROM cjworkbench_userprofile
                    WHERE user_id = %s
                    """,
                    [safe_step.workflow.owner_id],
                )
                locale_id = cursor.fetchone()[0]
            maybe_delta = notifications.OutputDelta(
                user=safe_step.workflow.owner,
                workflow=safe_step.workflow,
                step=safe_step,
                locale_id=locale_id,
            )
        else:
            maybe_delta = None

        return SaveResult(safe_step.cached_render_result, maybe_delta)