Esempio n. 1
0
    def test_cleanutil_removecolumns(self):

        int_missing_data = [[1, 0, 0], [0, None, None], [None, None, None]]
        columns = ["col1", "col2", "col3"]
        data = pd.DataFrame(int_missing_data, columns=columns)

        clean = Clean(x_train=data, test_split_percentage=0.5, split=False)
        clean.remove_columns(0.5)
        validate = clean.x_train.columns.tolist()

        self.assertListEqual(validate, ["col1"])
Esempio n. 2
0
    def test_report_cleaning_technique(self):

        int_missing_data = np.array([(1, 0, 0), (0, None, None),
                                     (None, None, None)])
        columns = ["col1", "col2", "col3"]
        data = pd.DataFrame(int_missing_data, columns=columns)

        clean = Clean(x_train=data,
                      test_split_percentage=0.5,
                      split=False,
                      report_name="test")
        clean.remove_columns(0.5)

        with open(clean._data_properties.report.filename) as f:
            content = f.read()
        validate = "col2" in content and "col3" in content

        os.remove(clean._data_properties.report.filename)

        self.assertTrue(validate)