Ejemplo n.º 1
0
    def test_cleanutil_removecolumns(self):

        int_missing_data = [[1, 0, 0], [0, None, None], [None, None, None]]
        columns = ["col1", "col2", "col3"]
        data = pd.DataFrame(int_missing_data, columns=columns)

        clean = Data(x_train=data,
                     test_split_percentage=0.5,
                     split=False,
                     report_name="test")
        clean.drop_column_missing_threshold(0.5)
        validate = clean.x_train.columns.tolist()

        self.assertListEqual(validate, ["col1"])
Ejemplo n.º 2
0
    def test_report_cleaning_technique(self):

        int_missing_data = np.array([(1, 0, 0), (0, None, None),
                                     (None, None, None)])
        columns = ["col1", "col2", "col3"]
        data = pd.DataFrame(int_missing_data, columns=columns)

        clean = Data(x_train=data,
                     test_split_percentage=0.5,
                     split=False,
                     report_name="test")
        clean.drop_column_missing_threshold(0.5)

        with open(clean.report.filename) as f:
            content = f.read()
        validate = "col2" in content and "col3" in content

        os.remove(clean.report.filename)

        self.assertTrue(validate)