def _test_refine_spec_apply(self, in_table: pd.DataFrame, column: str, spec: RefineSpec, expected_out: pd.DataFrame=pd.DataFrame(), expected_error: str='') -> None: """Render and assert the output is as expected.""" result = ProcessResult.coerce(spec.apply(in_table, column)) # Sanitize result+expected, so if sanitize changes these tests may # break (which is what we want). result.sanitize_in_place() expected = ProcessResult(expected_out, expected_error) expected.sanitize_in_place() self.assertEqual(result.error, expected.error) assert_frame_equal(result.dataframe, expected.dataframe)
def test_refine_rename_nan_category(self): # Get explicit, because pandas botches the comparison: # # >>> self._test_refine_spec_apply( # pd.DataFrame({'A': [np.nan]}, dtype='category'), # 'A', RefineSpec({'b': 'c'}), # pd.DataFrame({'A': [np.nan]}, dtype='category') # ) # Attribute "dtype" are different # [left]: CategoricalDtype(categories=[], ordered=False) # [right]: CategoricalDtype(categories=[], ordered=False) spec = RefineSpec({'b': 'c'}) dtype1 = pd.DataFrame({'A': []}, dtype='category')['A'].dtype result = spec.apply(pd.DataFrame({'A': []}, dtype='category'), 'A') dtype2 = result['A'].dtype self.assertEqual(0, len(result)) self.assertEqual(0, len(result['A'].cat.categories))
def _test_refine_spec_apply(self, in_table: pd.DataFrame, column: str, spec: RefineSpec, expected_out: pd.DataFrame = pd.DataFrame(), expected_error: str = '') -> None: """Render and assert the output is as expected.""" result = spec.apply(in_table, column) if not expected_out.empty and expected_error: table, error = result self.assertEqual(error, expected_error) assert_frame_equal(table, expected_out) elif expected_error: self.assertEqual(result, expected_error) else: assert_frame_equal(result, expected_out)