コード例 #1
0
ファイル: test_renderprep.py プロジェクト: afcarl/cjworkbench
 def test_clean_normal_dict(self):
     context = self._render_context()
     schema = ParamDType.Dict(
         {"str": ParamDType.String(), "int": ParamDType.Integer()}
     )
     value = {"str": "foo", "int": 3}
     expected = dict(value)  # no-op
     result = clean_value(schema, value, context)
     self.assertEqual(result, expected)
コード例 #2
0
 def test_clean_normal_dict(self):
     input_shape = TableMetadata(3, [Column("A", ColumnType.Number())])
     schema = ParamDType.Dict({
         "str": ParamDType.String(),
         "int": ParamDType.Integer()
     })
     value = {"str": "foo", "int": 3}
     expected = dict(value)  # no-op
     result = clean_value(schema, value, input_shape)
     self.assertEqual(result, expected)
コード例 #3
0
    def test_execute_partial_cache_hit(self, fake_load_module):
        ModuleVersion.create_or_replace_from_spec(
            {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []}
        )
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh. Should not render.
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        # step2: cached result is stale, so must be re-rendered
        step2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id - 1,
        )
        rendercache.cache_render_result(
            workflow,
            step2,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"B": [2]})),
        )
        step2.last_relevant_delta_id = workflow.last_delta_id
        step2.save(update_fields=["last_relevant_delta_id"])

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, ParamDType.Dict({}), {}),
                ExecuteStep(step2, ParamDType.Dict({}), {}),
            ],
        )

        expected = RenderResult(arrow_table({"B": [3]}))
        fake_load_module.return_value.render.return_value = expected
        with self._execute(workflow, tab_flow, {}) as result:
            assert_render_result_equals(result, expected)

        fake_load_module.return_value.render.assert_called_once()  # step2, not step1
        self.assertRegex(
            # Output is to the correct file
            fake_load_module.return_value.render.call_args[1]["output_filename"],
            r"execute-tab-output.*\.arrow",
        )
コード例 #4
0
    def test_resume_backtrack_on_corrupt_cache_error(self, fake_load_module):
        ModuleVersion.create_or_replace_from_spec(
            {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []}
        )
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        # step1: cached result is fresh -- but CORRUPT
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        minio.put_bytes(
            # Write corrupted data -- will lead to CorruptCacheError
            rendercache.io.BUCKET,
            rendercache.io.crr_parquet_key(step1.cached_render_result),
            b"CORRUPT",
        )
        # step2: no cached result -- must re-render
        step2 = tab.wf_modules.create(order=1, slug="step-2", module_id_name="mod")

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, ParamDType.Dict({}), {}),
                ExecuteStep(step2, ParamDType.Dict({}), {}),
            ],
        )

        expected = RenderResult(arrow_table({"B": [2]}))
        fake_load_module.return_value.render.return_value = expected
        with self._execute(
            workflow, tab_flow, {}, expect_log_level=logging.ERROR
        ) as result:
            assert_render_result_equals(result, expected)

        self.assertEqual(
            # called with step1, then step2
            fake_load_module.return_value.render.call_count,
            2,
        )
        self.assertRegex(
            # Output is to the correct file
            fake_load_module.return_value.render.call_args[1]["output_filename"],
            r"execute-tab-output.*\.arrow",
        )
コード例 #5
0
ファイル: test_renderprep.py プロジェクト: afcarl/cjworkbench
    def test_list_prompting_error_concatenate_different_type_to_text(self):
        context = self._render_context(
            input_table=arrow_table({"A": [1], "B": [datetime.now()]})
        )
        schema = ParamDType.List(
            inner_dtype=ParamDType.Column(column_types=frozenset({"text"}))
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"}))],
        )
コード例 #6
0
ファイル: test_renderprep.py プロジェクト: afcarl/cjworkbench
    def test_list_prompting_error_concatenate_same_type(self):
        context = self._render_context(
            input_table=arrow_table({"A": ["1"], "B": ["2"]})
        )
        schema = ParamDType.List(
            inner_dtype=ParamDType.Column(column_types=frozenset({"number"}))
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [PromptingError.WrongColumnType(["A", "B"], "text", frozenset({"number"}))],
        )
コード例 #7
0
 def test_clean_multicolumn_sort_in_table_order(self):
     input_shape = TableMetadata(3, [
         Column("B", ColumnType.Number()),
         Column("A", ColumnType.Number())
     ])
     result = clean_value(ParamDType.Multicolumn(), ["A", "B"], input_shape)
     self.assertEqual(result, ["B", "A"])
コード例 #8
0
    def test_clean_multichartseries_non_number_is_prompting_error(self):
        context = self._render_context(input_table=arrow_table({
            "A": ["a"],
            "B":
            pa.array([datetime.now()], pa.timestamp("ns"))
        }))
        value = [
            {
                "column": "A",
                "color": "#aaaaaa"
            },
            {
                "column": "B",
                "color": "#cccccc"
            },
        ]
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Multichartseries(), value, context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "datetime",
                                               frozenset({"number"})),
            ],
        )
コード例 #9
0
 def test_clean_float_with_int_value(self):
     # ParamDType.Float can have `int` values (because values come from
     # json.parse(), which only gives Numbers so can give "3" instead of
     # "3.0". We want to pass that as `float` in the `params` dict.
     result = clean_value(ParamDType.Float(), 3, None)
     self.assertEqual(result, 3.0)
     self.assertIsInstance(result, float)
コード例 #10
0
    def test_clean_file_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.wf_modules.create(module_id_name="uploadfile",
                                     order=0,
                                     slug="step-1")
        id = str(uuid.uuid4())
        key = f"wf-${workflow.id}/wfm-${step.id}/${id}"
        minio.put_bytes(minio.UserFilesBucket, key, b"1234")
        UploadedFile.objects.create(
            wf_module=step,
            name="x.csv.gz",
            size=4,
            uuid=id,
            bucket=minio.UserFilesBucket,
            key=key,
        )
        with ExitStack() as inner_stack:
            context = self._render_context(wf_module_id=step.id,
                                           exit_stack=inner_stack)
            result: Path = clean_value(ParamDType.File(), id, context)
            self.assertIsInstance(result, Path)
            self.assertEqual(result.read_bytes(), b"1234")
            self.assertEqual(result.suffixes, [".csv", ".gz"])

        # Assert that once `exit_stack` goes out of scope, file is deleted
        self.assertFalse(result.exists())
コード例 #11
0
 def test_clean_column_happy_path(self):
     input_shape = TableMetadata(3, [Column("A", ColumnType.Number())])
     self.assertEqual(
         clean_value(ParamDType.Column(column_types=frozenset({"number"})),
                     "A", input_shape),
         "A",
     )
コード例 #12
0
 def test_clean_multicolumn_sort_in_table_order(self):
     context = self._render_context(input_table=arrow_table({
         "B": [1],
         "A": [2]
     }))
     result = clean_value(ParamDType.Multicolumn(), ["A", "B"], context)
     self.assertEqual(result, ["B", "A"])
コード例 #13
0
 def test_clean_file_wrong_wf_module(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     step = tab.wf_modules.create(module_id_name="uploadfile",
                                  order=0,
                                  slug="step-1")
     step2 = tab.wf_modules.create(module_id_name="uploadfile",
                                   order=1,
                                   slug="step-2")
     id = str(uuid.uuid4())
     key = f"wf-${workflow.id}/wfm-${step.id}/${id}"
     minio.put_bytes(minio.UserFilesBucket, key, b"1234")
     UploadedFile.objects.create(
         wf_module=step2,
         name="x.csv.gz",
         size=4,
         uuid=id,
         bucket=minio.UserFilesBucket,
         key=key,
     )
     context = self._render_context(wf_module_id=step.id)
     result = clean_value(ParamDType.File(), id, context)
     self.assertIsNone(result)
     # Assert that if a temporary file was created to house the download, it
     # no longer exists.
     self.assertListEqual(list(self.basedir.iterdir()), [])
コード例 #14
0
 def test_clean_multicolumn_missing_is_removed(self):
     context = self._render_context(input_table=arrow_table({
         "A": [1],
         "B": [1]
     }))
     result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"],
                          context)
     self.assertEqual(result, ["A", "B"])
コード例 #15
0
ファイル: test_renderprep.py プロジェクト: afcarl/cjworkbench
 def test_clean_multichartseries_missing_is_removed(self):
     context = self._render_context(input_table=arrow_table({"A": [1], "B": [1]}))
     value = [
         {"column": "A", "color": "#aaaaaa"},
         {"column": "C", "color": "#cccccc"},
     ]
     result = clean_value(ParamDType.Multichartseries(), value, context)
     self.assertEqual(result, [{"column": "A", "color": "#aaaaaa"}])
コード例 #16
0
 def test_clean_multicolumn_missing_is_removed(self):
     input_shape = TableMetadata(3, [
         Column("A", ColumnType.Number()),
         Column("B", ColumnType.Number())
     ])
     result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"],
                          input_shape)
     self.assertEqual(result, ["A", "B"])
コード例 #17
0
 def test_map_parse(self):
     dtype = ParamDType.parse({
         "type": "map",
         "value_dtype": {
             "type": "dict",  # test nesting
             "properties": {
                 "foo": {
                     "type": "string"
                 }
             },
         },
     })
     self.assertEqual(
         repr(dtype),
         repr(
             ParamDType.Map(value_dtype=ParamDType.Dict(
                 properties={"foo": ParamDType.String()}))),
     )
コード例 #18
0
    def test_clean_tab_missing_tab_selected_gives_none(self):
        """
        If the user has selected a nonexistent tab, pretend tab is blank.

        JS sees nonexistent tab slugs. render() doesn't.
        """
        context = self._render_context(tab_results={})
        result = clean_value(ParamDType.Tab(), "tab-XXX", context)
        self.assertEqual(result, None)
コード例 #19
0
 def test_clean_multicolumn_from_other_tab_that_does_not_exist(self):
     # The other tab would not exist if the user selected and then deleted
     # it.
     schema = ParamDType.Dict({
         "tab":
         ParamDType.Tab(),
         "columns":
         ParamDType.Multicolumn(tab_parameter="tab"),
     })
     params = {"tab": "tab-missing", "columns": ["A-from-tab-1"]}
     context = self._render_context(
         input_table=arrow_table({"A-from-tab-1": [1]}),
         tab_results={},
         params=params,
     )
     result = clean_value(schema, params, context)
     # result['tab'] is not what we're testing here
     self.assertEqual(result["columns"], [])
コード例 #20
0
ファイル: test_renderprep.py プロジェクト: afcarl/cjworkbench
 def test_clean_column_prompting_error_convert_to_number(self):
     context = self._render_context(input_table=arrow_table({"A": ["1"]}))
     with self.assertRaises(PromptingError) as cm:
         clean_value(
             ParamDType.Column(column_types=frozenset({"number"})), "A", context
         )
     self.assertEqual(
         cm.exception.errors,
         [PromptingError.WrongColumnType(["A"], "text", frozenset({"number"}))],
     )
コード例 #21
0
    def test_clean_multicolumn_from_other_tab(self):
        tab2 = Tab("tab-2", "Tab 2")
        tab2_output_table = arrow_table({"A-from-tab-2": [1, 2]})

        schema = ParamDType.Dict({
            "tab":
            ParamDType.Tab(),
            "columns":
            ParamDType.Multicolumn(tab_parameter="tab"),
        })
        params = {"tab": "tab-2", "columns": ["A-from-tab-1", "A-from-tab-2"]}
        context = self._render_context(
            input_table=arrow_table({"A-from-tab-1": [1]}),
            tab_results={tab2: RenderResult(tab2_output_table)},
            params=params,
        )
        result = clean_value(schema, params, context)
        # result['tab'] is not what we're testing here
        self.assertEqual(result["columns"], ["A-from-tab-2"])
コード例 #22
0
ファイル: test_renderprep.py プロジェクト: afcarl/cjworkbench
 def test_clean_file_no_uploaded_file(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     wfm = tab.wf_modules.create(module_id_name="uploadfile", order=0, slug="step-1")
     context = self._render_context(wf_module_id=wfm.id)
     result = clean_value(ParamDType.File(), str(uuid.uuid4()), context)
     self.assertIsNone(result)
     # Assert that if a temporary file was created to house the download, it
     # no longer exists.
     self.assertListEqual(list(self.basedir.iterdir()), [])
コード例 #23
0
ファイル: test_renderprep.py プロジェクト: afcarl/cjworkbench
    def test_dict_prompting_error(self):
        context = self._render_context(
            input_table=arrow_table({"A": ["a"], "B": ["b"]})
        )
        schema = ParamDType.Dict(
            {
                "col1": ParamDType.Column(column_types=frozenset({"number"})),
                "col2": ParamDType.Column(column_types=frozenset({"datetime"})),
            }
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, {"col1": "A", "col2": "B"}, context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "text", frozenset({"datetime"})),
            ],
        )
コード例 #24
0
    def test_list_prompting_error_concatenate_different_type(self):
        context = self._render_context(input_table=arrow_table({
            "A": ["1"],
            "B":
            pa.array([datetime.now()], pa.timestamp("ns"))
        }))
        schema = ParamDType.List(inner_dtype=ParamDType.Column(
            column_types=frozenset({"number"})))
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "datetime",
                                               frozenset({"number"})),
            ],
        )
コード例 #25
0
    def test_execute_cache_miss(self, fake_load_module):
        ModuleVersion.create_or_replace_from_spec(
            {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []}
        )
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        step2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            module_id_name="mod",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, ParamDType.Dict({}), {}),
                ExecuteStep(step2, ParamDType.Dict({}), {}),
            ],
        )

        expected = RenderResult(arrow_table({"B": [2]}))
        fake_load_module.return_value.render.return_value = expected
        with self._execute(workflow, tab_flow, {}) as result:
            assert_render_result_equals(result, expected)

        self.assertEqual(
            fake_load_module.return_value.render.call_count, 2  # step2, not step1
        )
        self.assertRegex(
            # Output is to the correct file
            fake_load_module.return_value.render.call_args[1]["output_filename"],
            r"execute-tab-output.*\.arrow",
        )
コード例 #26
0
    def test_clean_column_prompting_error_convert_to_number(self):
        input_shape = TableMetadata(3, [Column("A", ColumnType.Text())])
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Column(column_types=frozenset({"number"})),
                        "A", input_shape)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"}))
            ],
        )
コード例 #27
0
    def test_dict_prompting_error(self):
        input_shape = TableMetadata(
            3,
            [Column("A", ColumnType.Text()),
             Column("B", ColumnType.Text())])
        schema = ParamDType.Dict({
            "col1":
            ParamDType.Column(column_types=frozenset({"number"})),
            "col2":
            ParamDType.Column(column_types=frozenset({"datetime"})),
        })
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, {"col1": "A", "col2": "B"}, input_shape)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text",
                                               frozenset({"number"})),
                PromptingError.WrongColumnType(["B"], "text",
                                               frozenset({"datetime"})),
            ],
        )
コード例 #28
0
ファイル: test_renderprep.py プロジェクト: afcarl/cjworkbench
    def test_clean_multicolumn_prompting_error_convert_to_text(self):
        # TODO make this _automatic_ instead of quick-fix?
        # ... but for now: prompt for a Quick Fix.
        context = self._render_context(
            input_table=arrow_table({"A": [1], "B": [datetime.now()], "C": ["x"]})
        )
        with self.assertRaises(PromptingError) as cm:
            schema = ParamDType.Multicolumn(column_types=frozenset({"text"}))
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"}))],
        )
コード例 #29
0
    def test_execute_cache_hit(self, fake_module):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step1 = tab.wf_modules.create(
            order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id
        )
        rendercache.cache_render_result(
            workflow,
            step1,
            workflow.last_delta_id,
            RenderResult(arrow_table({"A": [1]})),
        )
        step2 = tab.wf_modules.create(
            order=1, slug="step-2", last_relevant_delta_id=workflow.last_delta_id
        )
        rendercache.cache_render_result(
            workflow,
            step2,
            workflow.last_delta_id,
            RenderResult(arrow_table({"B": [2]})),
        )

        tab_flow = TabFlow(
            tab.to_arrow(),
            [
                ExecuteStep(step1, ParamDType.Dict({}), {}),
                ExecuteStep(step2, ParamDType.Dict({}), {}),
            ],
        )

        with self._execute(workflow, tab_flow, {}) as result:
            assert_render_result_equals(
                result, RenderResult(arrow_table({"B": [2]}), [])
            )

        fake_module.assert_not_called()
コード例 #30
0
    def test_clean_tabs_happy_path(self):
        tab2 = Tab("tab-2", "Tab 2")
        tab2_output = arrow_table({"B": [1]})
        tab3 = Tab("tab-3", "Tab 3")
        tab3_output = arrow_table({"C": [1]})

        context = self._render_context(tab_results={
            tab2: RenderResult(tab2_output),
            tab3: RenderResult(tab3_output),
        })
        result = clean_value(ParamDType.Multitab(), ["tab-2", "tab-3"],
                             context)
        self.assertEqual(
            result,
            [TabOutput(tab2, tab2_output),
             TabOutput(tab3, tab3_output)])