def test_dropcolumns_regex(self): int_missing_data = [[1, 0, 0, 3], [0, 2, 3, 4], [0, 3, 4, 4], [1, 2, 3, 6]] columns = ["agent.hi", "agent.user_name", "agent.hello", "message"] data = pd.DataFrame(int_missing_data, columns=columns) clean = Data( x_train=data, x_test=None, split=False, target_field="", report_name="test", test_split_percentage=0.5, ) clean.drop(regexp=r"agent*") validate = clean.x_train.columns == ["message"] self.assertTrue(validate)
def test_dropcolumns_complex(self): int_missing_data = [[1, 0, 0, 3], [0, 2, 3, 4], [0, 3, 4, 4], [1, 2, 3, 6]] columns = ["col1", "col2", "col3", "py"] data = pd.DataFrame(int_missing_data, columns=columns) clean = Data( x_train=data, x_test=None, split=True, target_field="", report_name="test", test_split_percentage=0.5, ) clean.drop("col1", keep=["col2"], regexp=r"col*", reason="Columns were unimportant.") validate = list(clean.x_train.columns) == ["col2", "py"] and list( clean.x_test.columns) == ["col2", "py"] self.assertTrue(validate)
def test_dropcolumns_keep(self): int_missing_data = [[1, 0, 0], [0, 2, 3], [0, 3, 4], [1, 2, 3]] columns = ["col1", "col2", "col3"] data = pd.DataFrame(int_missing_data, columns=columns) clean = Data(x_train=data, report_name="test", test_split_percentage=0.5) clean_inst = clean.drop(keep=["col2"], reason="Columns were unimportant.") validate = (clean_inst.x_train.columns == ["col2"] and clean_inst.x_test.columns == ["col2"] and isinstance(clean_inst, Data)) self.assertTrue(validate)