예제 #1
0
 def test_multiple_args(self):
     self.dataframe = pd.read_csv("datasets/encoding/testnew.csv")
     with pytest.raises(Exception):
         handler = NullValuesHandler()
         handler.execute(
             {"df": self.dataframe, "drop": True, "fill_missing": "mean"}
         )
예제 #2
0
def test_categorical_replace(test_input2):
    test_input2["train_df"] = pd.read_csv("datasets/encoding/test2.csv")
    test_input2["test_df"] = pd.read_csv("datasets/encoding/test2.csv")
    handler = NullValuesHandler()
    handler.execute(params=test_input2)
    assert (len(test_input2["train_df"][test_input2["train_df"].Capitals ==
                                        "xyz"]["Capitals"]) == 8)
    assert (len(test_input2["test_df"][test_input2["test_df"].Capitals ==
                                       "xyz"]["Capitals"]) == 8)
예제 #3
0
def test_categorical_drops(test_input1):
    train_row_count = (test_input1["train_df"].shape)[0]
    test_row_count = (test_input1["test_df"].shape)[0]
    handler = NullValuesHandler()
    handler.execute(params=test_input1)
    assert "0" not in test_input1["train_df"]["Other Capitals"]
    assert (test_input1["train_df"].shape)[0] <= train_row_count
    assert "0" not in test_input1["test_df"]["Other Capitals"]
    assert (test_input1["test_df"].shape)[0] <= test_row_count
예제 #4
0
def test_auto():
    dataframe1 = pd.read_csv("datasets/encoding/test2.csv")
    params = {"train_df": dataframe1, "test_df": dataframe1}
    r = (params["train_df"].shape)[0]
    rt = (params["test_df"].shape)[0]
    handler = NullValuesHandler()
    handler.execute(params=params)
    r_new = (params["train_df"].shape)[0]
    rt_new = (params["test_df"].shape)[0]
    assert r_new != r
    assert rt_new != rt
예제 #5
0
def test_drop_col():
    params = {
        "train_df": dataframe1,
        "test_df": dataframe1,
        "drop": True,
        "column_list": ["Distance"],
    }
    handler = NullValuesHandler()
    handler.execute(params=params)
    assert "Distance" not in params["train_df"].columns
    assert "Distance" not in params["test_df"].columns
예제 #6
0
    def test_output(self):
        params = {"df": dataframe, "drop": True}
        handler = NullValuesHandler()
        handler.execute(params=params)
        assert not params["df"].isnull().values.any()

        params = {"df": dataframe, "fill_missing": "mean"}
        handler = NullValuesHandler()
        handler.execute(params=params)
        assert not params["df"].isnull().any()["Distance"]

        params = {"df": dataframe, "fill_values": {"Distance": 0}}
        handler = NullValuesHandler()
        handler.execute(params=params)
        assert not params["df"].isnull().any()["Distance"]
예제 #7
0
    def test_multiple_args(self):
        with pytest.raises(ArgumentsError):
            handler = NullValuesHandler()
            handler.execute({
                "df": dataframe,
                "drop": True,
                "fill_missing": "mean"
            })

        with pytest.raises(ArgumentsError):
            handler = NullValuesHandler()
            handler.execute({
                "df": dataframe,
                "drop": True,
                "fill_values": {
                    "Test": "Tata"
                },
            })

        with pytest.raises(ArgumentsError):
            handler = NullValuesHandler()
            handler.execute({
                "df": dataframe,
                "drop": True,
                "fill_values": {
                    "Test": "Tata"
                },
            })

        with pytest.raises(ArgumentsError):
            handler = NullValuesHandler()
            handler.execute({
                "df": dataframe,
                "fill_missing": "mean",
                "fill_values": {
                    "Test": "Tata"
                },
            })
예제 #8
0
def test_multiple_args(test_input):
    with pytest.raises(ArgumentsError):
        handler = NullValuesHandler()
        handler.execute(params=test_input)
예제 #9
0
def test_output(test_input3):
    handler = NullValuesHandler()
    handler.execute(params=test_input3)
    assert not test_input3["train_df"].isnull().any()["Distance"]
    assert not test_input3["test_df"].isnull().any()["Distance"]
예제 #10
0
def test_incorrect_input_type(error, test_input):
    with pytest.raises(error):
        handler = NullValuesHandler()
        handler.execute(params=test_input)
예제 #11
0
def test_null_dataframe():
    with pytest.raises(ValueError):
        handler = NullValuesHandler()
        handler.execute({})
예제 #12
0
    def test_incorrect_input_type(self):
        # for dataframe argument
        array = np.random.random((5, 5))
        with pytest.raises(TypeError):
            handler = NullValuesHandler()
            handler.execute({"df": array})

        # for drop argument
        self.dataframe = pd.read_csv("datasets/encoding/testnew.csv")

        with pytest.raises(TypeError):
            handler = NullValuesHandler()
            handler.execute({"df": self.dataframe, "drop": "nice"})

        # for fill_missing argument
        with pytest.raises(TypeError):
            handler = NullValuesHandler()
            handler.execute({"df": dataframe, "fill_missing": 3})

        with pytest.raises(ArgumentsError):
            handler = NullValuesHandler()
            handler.execute({"df": dataframe, "fill_missing": "sum"})

        # for fill_values argument
        with pytest.raises(TypeError):
            value = [5]
            handler = NullValuesHandler()
            handler.execute({"df": dataframe, "fill_values": value})

        with pytest.raises(ArgumentsError):
            value = {"Label": 10}
            handler = NullValuesHandler()
            handler.execute({"df": dataframe, "fill_values": value})
예제 #13
0
 def test_none_args(self):
     with pytest.raises(ArgumentsError):
         handler = NullValuesHandler()
         handler.execute({"df": dataframe})
예제 #14
0
 def test_drop_col(self):
     params = {"df": dataframe, "drop": True, "column_list": ["Distance"]}
     handler = NullValuesHandler()
     handler.execute(params=params)
     assert "Distance" not in params["df"].columns
예제 #15
0
 def test_incorrect_input_type(self):
     self.dataframe = pd.read_csv("datasets/encoding/testnew.csv")
     with pytest.raises(TypeError):
         handler = NullValuesHandler()
         handler.execute({"df": self.dataframe, "drop": "nice"})
예제 #16
0
def test_output(test_input1):
    handler = NullValuesHandler()
    handler.execute(params=test_input1)
    assert not test_input1["df"].isnull().any()["Distance"]