Esempio n. 1
0
 def test_clean_tab_omit_unused_tabs_from_tab_outputs(self):
     result = self._call_prep_params(
         ParamSchema.Dict({"x": ParamSchema.Tab()}),
         {"x": "tab-1"},
         tab_results={
             Tab("tab-1", "Tab 1"):
             StepResult(Path("tab-1.arrow"), [TEXT("A")]),
             Tab("tab-2", "Tab 2"):
             StepResult(Path("tab-2.arrow"), [TEXT("A")]),
             Tab("tab-3", "Tab 3"):
             StepResult(Path("tab-3.arrow"), [TEXT("A")]),
         },
     )
     self.assertEqual(result.tab_outputs,
                      {"tab-1": TabOutput("Tab 1", "tab-1.arrow")})
Esempio n. 2
0
 def test_execute_empty_tab(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     tab_flow = TabFlow(Tab(tab.slug, tab.name), [])
     with self._execute(workflow, tab_flow, {}) as (result, path):
         self.assertEqual(result, StepResult(path, []))
         self.assertEqual(load_trusted_arrow_file(path), make_table())
Esempio n. 3
0
 def test_clean_tab_unreachable(self):
     tab = Tab("tab-error", "Buggy Tab")
     with self.assertRaises(TabOutputUnreachableError):
         self._call_clean_value(
             ParamSchema.Tab(),
             "tab-error",
             tab_results={tab: StepResult(Path("tab-error.arrow"), [])},
         )
Esempio n. 4
0
 def test_clean_tabs_tab_unreachable(self):
     with self.assertRaises(TabOutputUnreachableError):
         self._call_clean_value(
             ParamSchema.Multitab(),
             ["tab-1"],
             tab_results={
                 Tab("tab-1", "Tab 1"): StepResult(Path("tab-1.arrow"), [])
             },
         )
Esempio n. 5
0
    def test_resume_backtrack_on_corrupt_cache_error(self):
        module_zipfile = create_module_zipfile(
            "mod", spec_kwargs={"loads_data": True})
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh -- but CORRUPT
        step1 = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        write_to_rendercache(workflow, step1, workflow.last_delta_id,
                             make_table(make_column("A", [1])))
        step1.refresh_from_db()
        s3.put_bytes(
            # Write corrupted data -- will lead to CorruptCacheError
            rendercache.io.BUCKET,
            rendercache.io.crr_parquet_key(step1.cached_render_result),
            b"CORRUPT",
        )
        # step2: no cached result -- must re-render
        step2 = tab.steps.create(order=1, slug="step-2", module_id_name="mod")

        tab_flow = TabFlow(
            Tab(tab.slug, tab.name),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        new_table = make_table(make_column("B", ["b"]))

        with patch.object(Kernel, "render",
                          side_effect=mock_render(new_table)):
            with self._execute(workflow,
                               tab_flow, {},
                               expect_log_level=logging.ERROR) as (result,
                                                                   path):
                self.assertEqual(
                    result, StepResult(path, [Column("B", ColumnType.Text())]))

            self.assertEqual(
                # called with step1, then step2
                Kernel.render.call_count,
                2,
            )
            self.assertRegex(
                # Output is to the correct file
                Kernel.render.call_args[1]["output_filename"],
                r"execute-tab-output.*\.arrow",
            )
Esempio n. 6
0
    def test_execute_partial_cache_hit(self):
        module_zipfile = create_module_zipfile(
            "mod", spec_kwargs={"loads_data": True})
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh. Should not render.
        step1 = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        write_to_rendercache(workflow, step1, workflow.last_delta_id,
                             make_table(make_column("A", ["a"])))
        # step2: cached result is stale, so must be re-rendered
        step2 = tab.steps.create(
            order=1,
            slug="step-2",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        write_to_rendercache(
            workflow,
            step2,
            workflow.last_delta_id - 1,
            make_table(make_column("B", ["b"])),
        )

        tab_flow = TabFlow(
            Tab(tab.slug, tab.name),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        new_table = make_table(make_column("C", ["c"]))

        with patch.object(Kernel, "render",
                          side_effect=mock_render(new_table)):
            with self._execute(workflow, tab_flow, {}) as (result, path):
                self.assertEqual(
                    result, StepResult(path, [Column("C", ColumnType.Text())]))
                assert_arrow_table_equals(load_trusted_arrow_file(path),
                                          new_table)

            Kernel.render.assert_called_once()  # step2, not step1

            self.assertRegex(
                # Output is to the correct file
                Kernel.render.call_args[1]["output_filename"],
                r"execute-tab-output.*\.arrow",
            )
Esempio n. 7
0
 def test_clean_tabs_happy_path(self):
     self.assertEqual(
         self._call_prep_params(
             ParamSchema.Dict({"x": ParamSchema.Multitab()}),
             {"x": ["tab-2", "tab-3"]},
             tab_results={
                 Tab("tab-2", "Tab 2"):
                 StepResult(Path("tab-2.arrow"), [NUMBER("B")]),
                 Tab("tab-3", "Tab 3"):
                 StepResult(Path("tab-3.arrow"), [NUMBER("C")]),
             },
         ),
         PrepParamsResult(
             {"x": ["tab-2", "tab-3"]},
             {
                 "tab-2": TabOutput("Tab 2", "tab-2.arrow"),
                 "tab-3": TabOutput("Tab 3", "tab-3.arrow"),
             },
             uploaded_files={},
         ),
     )
Esempio n. 8
0
 def test_clean_tabs_preserve_ordering(self):
     # "x" gives wrongly-ordered tabs; renderprep should reorder them.
     result = self._call_prep_params(
         ParamSchema.Dict({"x": ParamSchema.Multitab()}),
         {"x": ["tab-2", "tab-3"]},
         tab_results={
             Tab("tab-3", "Tab 3"):
             StepResult(Path("tab-3.arrow"), [NUMBER("C")]),
             Tab("tab-2", "Tab 2"):
             StepResult(Path("tab-2.arrow"), [NUMBER("B")]),
         },
     )
     self.assertEqual(
         result,
         PrepParamsResult(
             {"x": ["tab-3", "tab-2"]},
             {
                 "tab-3": TabOutput("Tab 3", "tab-3.arrow"),
                 "tab-2": TabOutput("Tab 2", "tab-2.arrow"),
             },
             uploaded_files={},
         ),
     )
Esempio n. 9
0
 def test_clean_tab_happy_path(self):
     result = self._call_prep_params(
         ParamSchema.Dict({"x": ParamSchema.Tab()}),
         {"x": "tab-1"},
         tab_results={
             Tab("tab-1", "Tab 1"): StepResult(Path("tab-1.arrow"),
                                               [TEXT("A")])
         },
     )
     self.assertEqual(
         result,
         PrepParamsResult(
             {"x": "tab-1"},
             tab_outputs={"tab-1": TabOutput("Tab 1", "tab-1.arrow")},
             uploaded_files={},
         ),
     )
Esempio n. 10
0
 def test_clean_multicolumn_from_other_tab(self):
     schema = ParamSchema.Dict({
         "tab":
         ParamSchema.Tab(),
         "columns":
         ParamSchema.Multicolumn(tab_parameter="tab"),
     })
     params = {"tab": "tab-2", "columns": ["A-from-tab-1", "A-from-tab-2"]}
     result = self._call_prep_params(
         schema,
         params,
         input_table_columns=[NUMBER("A-from-tab-1")],
         tab_results={
             Tab("tab-2", "Tab 2"):
             StepResult(Path("tab-2.arrow"), [NUMBER("A-from-tab-2")])
         },
     )
     self.assertEqual(result.params["columns"], ["A-from-tab-2"])
Esempio n. 11
0
    def test_execute_cache_hit(self):
        cached_table1 = make_table(make_column("A", [1]))
        cached_table2 = make_table(make_column("B", [2], format="${:,}"))
        module_zipfile = create_module_zipfile(
            "mod", spec_kwargs={"loads_data": True})
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step1 = tab.steps.create(order=0,
                                 slug="step-1",
                                 last_relevant_delta_id=workflow.last_delta_id)
        write_to_rendercache(workflow, step1, workflow.last_delta_id,
                             cached_table1)
        step2 = tab.steps.create(order=1,
                                 slug="step-2",
                                 last_relevant_delta_id=workflow.last_delta_id)
        write_to_rendercache(workflow, step2, workflow.last_delta_id,
                             cached_table2)

        tab_flow = TabFlow(
            Tab(tab.slug, tab.name),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        unwanted_table = make_table(make_column("No", ["bad"]))
        with patch.object(Kernel,
                          "render",
                          side_effect=mock_render(unwanted_table)):
            with self._execute(workflow, tab_flow, {}) as (result, path):
                self.assertEqual(
                    result,
                    StepResult(
                        path,
                        [Column("B", ColumnType.Number(format="${:,}"))]),
                )
                assert_arrow_table_equals(load_trusted_arrow_file(path),
                                          cached_table2)

            Kernel.render.assert_not_called()
Esempio n. 12
0
    def test_execute_cache_miss(self):
        module_zipfile = create_module_zipfile(
            "mod", spec_kwargs={"loads_data": True})
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step1 = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        step2 = tab.steps.create(
            order=1,
            slug="step-2",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        tab_flow = TabFlow(
            Tab(tab.slug, tab.name),
            [
                ExecuteStep(step1, module_zipfile, {}),
                ExecuteStep(step2, module_zipfile, {}),
            ],
        )

        table = make_table(make_column("A", ["a"]))

        with patch.object(Kernel, "render", side_effect=mock_render(table)):
            with self._execute(workflow, tab_flow, {}) as (result, path):
                self.assertEqual(
                    result, StepResult(path, [Column("A", ColumnType.Text())]))
                assert_arrow_table_equals(load_trusted_arrow_file(path), table)

            self.assertEqual(Kernel.render.call_count, 2)  # step2, not step1
            self.assertRegex(
                # Output is to the correct file
                Kernel.render.call_args[1]["output_filename"],
                r"execute-tab-output.*\.arrow",
            )