def test_cleanutil_removecolumns(self): int_missing_data = [[1, 0, 0], [0, None, None], [None, None, None]] columns = ["col1", "col2", "col3"] data = pd.DataFrame(int_missing_data, columns=columns) clean = Clean(x_train=data, test_split_percentage=0.5, split=False) clean.remove_columns(0.5) validate = clean.x_train.columns.tolist() self.assertListEqual(validate, ["col1"])
def test_report_cleaning_technique(self): int_missing_data = np.array([(1, 0, 0), (0, None, None), (None, None, None)]) columns = ["col1", "col2", "col3"] data = pd.DataFrame(int_missing_data, columns=columns) clean = Clean(x_train=data, test_split_percentage=0.5, split=False, report_name="test") clean.remove_columns(0.5) with open(clean._data_properties.report.filename) as f: content = f.read() validate = "col2" in content and "col3" in content os.remove(clean._data_properties.report.filename) self.assertTrue(validate)