Ejemplo n.º 1
0
    def test_dropcolumns_complex(self):

        int_missing_data = [[1, 0, 0, 3], [0, 2, 3, 4], [0, 3, 4, 4],
                            [1, 2, 3, 6]]
        columns = ["col1", "col2", "col3", "py"]
        data = pd.DataFrame(int_missing_data, columns=columns)

        clean = Clean(
            x_train=data,
            x_test=None,
            split=True,
            target_field="",
            report_name="test",
            test_split_percentage=0.5,
        )
        clean.drop("col1",
                   keep=["col2"],
                   regexp=r"col*",
                   reason="Columns were unimportant.")

        validate = list(clean.x_train.columns) == ["col2", "py"] and list(
            clean.x_test.columns) == ["col2", "py"]

        self.assertTrue(validate)
Ejemplo n.º 2
0
    def test_dropcolumns_keep(self):

        int_missing_data = [[1, 0, 0], [0, 2, 3], [0, 3, 4], [1, 2, 3]]
        columns = ["col1", "col2", "col3"]
        data = pd.DataFrame(int_missing_data, columns=columns)

        clean = Clean(x_train=data,
                      report_name="test",
                      test_split_percentage=0.5)
        clean_inst = clean.drop(keep=["col2"],
                                reason="Columns were unimportant.")

        validate = (clean_inst.x_train.columns == ["col2"]
                    and clean_inst.x_test.columns == ["col2"]
                    and isinstance(clean_inst, Clean))

        self.assertTrue(validate)