def test_dropcolumns_complex(self): int_missing_data = [[1, 0, 0, 3], [0, 2, 3, 4], [0, 3, 4, 4], [1, 2, 3, 6]] columns = ["col1", "col2", "col3", "py"] data = pd.DataFrame(int_missing_data, columns=columns) clean = Clean( x_train=data, x_test=None, split=True, target_field="", report_name="test", test_split_percentage=0.5, ) clean.drop("col1", keep=["col2"], regexp=r"col*", reason="Columns were unimportant.") validate = list(clean.x_train.columns) == ["col2", "py"] and list( clean.x_test.columns) == ["col2", "py"] self.assertTrue(validate)
def test_dropcolumns_keep(self): int_missing_data = [[1, 0, 0], [0, 2, 3], [0, 3, 4], [1, 2, 3]] columns = ["col1", "col2", "col3"] data = pd.DataFrame(int_missing_data, columns=columns) clean = Clean(x_train=data, report_name="test", test_split_percentage=0.5) clean_inst = clean.drop(keep=["col2"], reason="Columns were unimportant.") validate = (clean_inst.x_train.columns == ["col2"] and clean_inst.x_test.columns == ["col2"] and isinstance(clean_inst, Clean)) self.assertTrue(validate)