Esempio n. 1
0
 def test_refine_rename_nan_category(self):
     # Get explicit, because pandas botches the comparison:
     #
     # >>> self._test_refine_spec_apply(
     #     pd.DataFrame({'A': [np.nan]}, dtype='category'),
     #     'A', RefineSpec({'b': 'c'}),
     #     pd.DataFrame({'A': [np.nan]}, dtype='category')
     # )
     # Attribute "dtype" are different
     # [left]:  CategoricalDtype(categories=[], ordered=False)
     # [right]: CategoricalDtype(categories=[], ordered=False)
     spec = RefineSpec({"b": "c"})
     result = spec.apply(pd.DataFrame({"A": []}, dtype="category"), "A")
     self.assertEqual(0, len(result))
     self.assertEqual(0, len(result["A"].cat.categories))
Esempio n. 2
0
 def test_refine_ignore_nan(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": ["a", "b", np.nan]}, dtype="category"),
         "A",
         RefineSpec({"b": "a"}),
         pd.DataFrame({"A": ["a", "a", np.nan]}, dtype="category"),
     )
Esempio n. 3
0
 def test_refine_rename_category_to_existing(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": ["a", "b"]}, dtype="category"),
         "A",
         RefineSpec({"b": "a"}),
         pd.DataFrame({"A": ["a", "a"]}, dtype="category"),
     )
Esempio n. 4
0
 def test_refine_spurious_rename(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": ["a"]}, dtype="category"),
         "A",
         RefineSpec({"b": "c"}),
         pd.DataFrame({"A": ["a"]}, dtype="category"),
     )
Esempio n. 5
0
 def test_refine_rename_empty_category(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": []}, dtype="category"),
         "A",
         RefineSpec({"b": "c"}),
         pd.DataFrame({"A": []}, dtype="category"),
     )
Esempio n. 6
0
 def test_parse_v2_no_blacklist_after_rename(self):
     self._test_parse_v2("A", {
         "renames": {
             "a": "b"
         },
         "blacklist": ["c"]
     }, RefineSpec({"a": "b"}))
Esempio n. 7
0
 def test_refine_rename_to_new(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": ["a", "b"]}),
         "A",
         RefineSpec({"b": "c"}),
         pd.DataFrame({"A": ["a", "c"]}, dtype="category"),
     )
Esempio n. 8
0
 def test_parse_v0_cascade_rename(self):
     self._test_parse_v0(
         "A",
         [
             {
                 "type": "change",
                 "column": "A",
                 "content": {
                     "fromVal": "x",
                     "toVal": "y"
                 },
             },
             {
                 "type": "change",
                 "column": "A",
                 "content": {
                     "fromVal": "y",
                     "toVal": "z"
                 },
             },
         ],
         RefineSpec({
             "x": "z",
             "y": "z"
         }),
     )
Esempio n. 9
0
 def test_parse_v0_ignore_wrong_column(self):
     self._test_parse_v0(
         "A",
         [{
             "type": "select",
             "column": "B",
             "content": {
                 "value": "foo"
             }
         }],
         RefineSpec(),
     )
Esempio n. 10
0
 def test_parse_v0_filter(self):
     self._test_parse_v0(
         "A",
         [{
             "type": "select",
             "column": "A",
             "content": {
                 "value": "foo"
             }
         }],
         RefineSpec(),
     )
Esempio n. 11
0
    def _test_refine_spec_apply(
        self,
        in_table: pd.DataFrame,
        column: str,
        spec: RefineSpec,
        expected_out: pd.DataFrame = pd.DataFrame(),
        expected_error: str = "",
    ) -> None:
        """Render and assert the output is as expected."""
        result = spec.apply(in_table, column)

        if not expected_out.empty and expected_error:
            table, error = result
            self.assertEqual(error, expected_error)
            assert_frame_equal(table, expected_out)
        elif expected_error:
            self.assertEqual(result, expected_error)
        else:
            assert_frame_equal(result, expected_out)
Esempio n. 12
0
    def test_parse_v0_no_blacklist_after_rename(self):
        # The old logic would run one edit at a time, modifying the dataframe
        # each time and adding a separate "selected" column. When the user
        # added a 'change', the old logic would check the 'selected' of the
        # destination value.
        #
        # ... this was a stateful and confusing way of accomplishing something
        # terribly simple: rename first, then filter.
        #
        # Unfortunately, the behavior would depend on the values in the table.
        # Now we don't: the user edits a set of instructions, not direct table
        # values. Before, in this example, 'y' might be selected or it might
        # be deselected. Now, after the upgrade, it's deselected. This isn't
        # strictly compatible, but how hard are we meant to work on supporting
        # this old format?

        # UPDATE 1/29/2019
        # New Refine module does not filter and therefore does not make use of the blacklist.
        # blacklist now omitted from RefineSpec, so only rename should be included
        self._test_parse_v0(
            "A",
            [
                {
                    "type": "select",
                    "column": "A",
                    "content": {
                        "value": "x"
                    }
                },
                {
                    "type": "change",
                    "column": "A",
                    "content": {
                        "fromVal": "x",
                        "toVal": "y"
                    },
                },
            ],
            RefineSpec({"x": "y"}),  # opinionated
        )
Esempio n. 13
0
 def test_parse_v3_only_rename(self):
     self._test_parse_v2("A", {"renames": {
         "a": "b"
     }}, RefineSpec({"a": "b"}))