Beispiel #1
0
 def test_parse_v0_valueerror_bad_type(self):
     with self.assertRaises(ValueError):
         RefineSpec.parse_v0('A', [
             {
                 'type': 'selec',
                 'column': 'A',
                 'content': {
                     'value': 'x'
                 }
             },
         ])
Beispiel #2
0
 def test_parse_v0_valueerror_bad_change_no_content_key(self):
     with self.assertRaises(ValueError):
         RefineSpec.parse_v0('A', [
             {
                 'type': 'change',
                 'column': 'A',
                 'contentx': {
                     'fromVal': 'x',
                     'toVal': 'y'
                 }
             },
         ])
Beispiel #3
0
 def test_refine_rename_swap(self):
     self._test_refine_spec_apply(
         pd.DataFrame({'A': ['a', 'b']}, dtype='category'),
         'A',
         RefineSpec({'a': 'b', 'b': 'a'}),
         pd.DataFrame({'A': ['b', 'a']}, dtype='category')
     )
Beispiel #4
0
 def test_parse_v2_no_blacklist_after_rename(self):
     self._test_parse_v2('A', {
         'renames': {
             'a': 'b'
         },
         'blacklist': ['c']
     }, RefineSpec({'a': 'b'}))
Beispiel #5
0
 def test_refine_blacklist(self):
     self._test_refine_spec_apply(
         pd.DataFrame({'A': ['a', 'b']}, dtype='category'),
         'A',
         RefineSpec({}, ['a']),
         pd.DataFrame({'A': ['b']}, dtype='category')
     )
Beispiel #6
0
 def test_refine_cast_int_to_str(self):
     self._test_refine_spec_apply(
         pd.DataFrame({'A': [1, 2]}),
         'A',
         RefineSpec({'1': '2'}),
         pd.DataFrame({'A': ['2', '2']}, dtype='category')
     )
Beispiel #7
0
 def test_parse_v1(self):
     result = RefineSpec.parse('A', {
         'renames': {'x': 'y', 'y': 'z'},
         'blacklist': ['z']
     })
     self.assertEqual(result.renames, {'x': 'y', 'y': 'z'})
     self.assertEqual(result.blacklist, ['z'])
Beispiel #8
0
 def test_refine_rename_category_to_existing(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": ["a", "b"]}, dtype="category"),
         "A",
         RefineSpec({"b": "a"}),
         pd.DataFrame({"A": ["a", "a"]}, dtype="category"),
     )
Beispiel #9
0
 def test_refine_rename_empty_category(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": []}, dtype="category"),
         "A",
         RefineSpec({"b": "c"}),
         pd.DataFrame({"A": []}, dtype="category"),
     )
Beispiel #10
0
 def test_refine_spurious_rename(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": ["a"]}, dtype="category"),
         "A",
         RefineSpec({"b": "c"}),
         pd.DataFrame({"A": ["a"]}, dtype="category"),
     )
Beispiel #11
0
 def test_refine_rename_to_new(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": ["a", "b"]}),
         "A",
         RefineSpec({"b": "c"}),
         pd.DataFrame({"A": ["a", "c"]}, dtype="category"),
     )
Beispiel #12
0
 def test_parse_v2_no_blacklist_after_rename(self):
     self._test_parse_v2("A", {
         "renames": {
             "a": "b"
         },
         "blacklist": ["c"]
     }, RefineSpec({"a": "b"}))
Beispiel #13
0
 def test_refine_cast_date_to_str(self):
     self._test_refine_spec_apply(
         pd.DataFrame({'A': [np.datetime64('2018-08-03T17:12')]}),
         'A',
         RefineSpec({'2018-08-03 17:12:00': 'x'}),
         pd.DataFrame({'A': ['x']}, dtype='category')
     )
Beispiel #14
0
 def test_parse_v0_cascade_rename(self):
     self._test_parse_v0(
         "A",
         [
             {
                 "type": "change",
                 "column": "A",
                 "content": {
                     "fromVal": "x",
                     "toVal": "y"
                 },
             },
             {
                 "type": "change",
                 "column": "A",
                 "content": {
                     "fromVal": "y",
                     "toVal": "z"
                 },
             },
         ],
         RefineSpec({
             "x": "z",
             "y": "z"
         }),
     )
Beispiel #15
0
 def test_refine_rename_category_to_new(self):
     self._test_refine_spec_apply(
         pd.DataFrame({'A': ['a', 'b']}, dtype='category'),
         'A',
         RefineSpec({'b': 'c'}),
         pd.DataFrame({'A': ['a', 'c']}, dtype='category')
     )
Beispiel #16
0
 def test_refine_ignore_nan(self):
     self._test_refine_spec_apply(
         pd.DataFrame({"A": ["a", "b", np.nan]}, dtype="category"),
         "A",
         RefineSpec({"b": "a"}),
         pd.DataFrame({"A": ["a", "a", np.nan]}, dtype="category"),
     )
Beispiel #17
0
 def test_parse_v0_ignore_wrong_column(self):
     self._test_parse_v0('A', [{
         'type': 'select',
         'column': 'B',
         'content': {
             'value': 'foo'
         }
     }], RefineSpec())
Beispiel #18
0
 def test_parse_v0_filter(self):
     self._test_parse_v0('A', [{
         'type': 'select',
         'column': 'A',
         'content': {
             'value': 'foo'
         }
     }], RefineSpec())
Beispiel #19
0
 def test_refine_rename_nan_category(self):
     # Get explicit, because pandas botches the comparison:
     #
     # >>> self._test_refine_spec_apply(
     #     pd.DataFrame({'A': [np.nan]}, dtype='category'),
     #     'A', RefineSpec({'b': 'c'}),
     #     pd.DataFrame({'A': [np.nan]}, dtype='category')
     # )
     # Attribute "dtype" are different
     # [left]:  CategoricalDtype(categories=[], ordered=False)
     # [right]: CategoricalDtype(categories=[], ordered=False)
     spec = RefineSpec({'b': 'c'})
     dtype1 = pd.DataFrame({'A': []}, dtype='category')['A'].dtype
     result = spec.apply(pd.DataFrame({'A': []}, dtype='category'), 'A')
     dtype2 = result['A'].dtype
     self.assertEqual(0, len(result))
     self.assertEqual(0, len(result['A'].cat.categories))
Beispiel #20
0
 def _test_parse_v0(self, column: str, arr: List[Dict[str, Any]],
                    expected: RefineSpec) -> None:
     """
     Test that deprecated input is transformed into what the user expects.
     """
     result = RefineSpec.parse_v0(column, arr)
     self.assertEqual(result.renames, expected.renames)
     self.assertEqual(set(result.blacklist), set(expected.blacklist))
Beispiel #21
0
 def test_parse_v0_rename(self):
     self._test_parse_v0(
         'A',
         [
             {'type': 'change', 'column': 'A',
              'content': {'fromVal': 'x', 'toVal': 'y'}},
         ],
         RefineSpec({'x': 'y'})
     )
Beispiel #22
0
 def test_parse_v0_filter_multiple(self):
     self._test_parse_v0(
         'A',
         [
           {'type': 'select', 'column': 'A', 'content': {'value': 'foo'}},
           {'type': 'select', 'column': 'A', 'content': {'value': 'foo'}},
         ],
         RefineSpec(blacklist=[])
     )
Beispiel #23
0
 def test_parse_v0_filter(self):
     self._test_parse_v0(
         "A",
         [{
             "type": "select",
             "column": "A",
             "content": {
                 "value": "foo"
             }
         }],
         RefineSpec(),
     )
Beispiel #24
0
 def test_parse_v0_ignore_wrong_column(self):
     self._test_parse_v0(
         "A",
         [{
             "type": "select",
             "column": "B",
             "content": {
                 "value": "foo"
             }
         }],
         RefineSpec(),
     )
Beispiel #25
0
    def _test_refine_spec_apply(self, in_table: pd.DataFrame, column: str,
                                spec: RefineSpec,
                                expected_out: pd.DataFrame=pd.DataFrame(),
                                expected_error: str='') -> None:
        """Render and assert the output is as expected."""
        result = ProcessResult.coerce(spec.apply(in_table, column))
        # Sanitize result+expected, so if sanitize changes these tests may
        # break (which is what we want).
        result.sanitize_in_place()

        expected = ProcessResult(expected_out, expected_error)
        expected.sanitize_in_place()

        self.assertEqual(result.error, expected.error)
        assert_frame_equal(result.dataframe, expected.dataframe)
Beispiel #26
0
    def _test_refine_spec_apply(self,
                                in_table: pd.DataFrame,
                                column: str,
                                spec: RefineSpec,
                                expected_out: pd.DataFrame = pd.DataFrame(),
                                expected_error: str = '') -> None:
        """Render and assert the output is as expected."""
        result = spec.apply(in_table, column)

        if not expected_out.empty and expected_error:
            table, error = result
            self.assertEqual(error, expected_error)
            assert_frame_equal(table, expected_out)
        elif expected_error:
            self.assertEqual(result, expected_error)
        else:
            assert_frame_equal(result, expected_out)
Beispiel #27
0
    def test_parse_v0_no_blacklist_after_rename(self):
        # The old logic would run one edit at a time, modifying the dataframe
        # each time and adding a separate "selected" column. When the user
        # added a 'change', the old logic would check the 'selected' of the
        # destination value.
        #
        # ... this was a stateful and confusing way of accomplishing something
        # terribly simple: rename first, then filter.
        #
        # Unfortunately, the behavior would depend on the values in the table.
        # Now we don't: the user edits a set of instructions, not direct table
        # values. Before, in this example, 'y' might be selected or it might
        # be deselected. Now, after the upgrade, it's deselected. This isn't
        # strictly compatible, but how hard are we meant to work on supporting
        # this old format?

        # UPDATE 1/29/2019
        # New Refine module does not filter and therefore does not make use of the blacklist.
        # blacklist now omitted from RefineSpec, so only rename should be included
        self._test_parse_v0(
            'A',
            [
                {
                    'type': 'select',
                    'column': 'A',
                    'content': {
                        'value': 'x'
                    }
                },
                {
                    'type': 'change',
                    'column': 'A',
                    'content': {
                        'fromVal': 'x',
                        'toVal': 'y'
                    }
                },
            ],
            RefineSpec({'x': 'y'})  # opinionated
        )
Beispiel #28
0
 def test_parse_v0_valueerror_not_dict(self):
     with self.assertRaises(ValueError):
         RefineSpec.parse_v0('A', ['foo'])
Beispiel #29
0
 def test_parse_v1_missing_renames(self):
     with self.assertRaises(ValueError):
         RefineSpec.parse('A', {'enames': {}, 'blacklist': []})
Beispiel #30
0
 def test_parse_v1_bad_blacklist(self):
     with self.assertRaises(ValueError):
         RefineSpec.parse('A', {'renames': {}, 'blacklist': 3})