def test_no_fit(self, iris_test_data, iris_edge_list):
     df = iris_test_data.copy()
     df.drop(["sepal length (cm)"], axis=1, inplace=True)
     clf = BayesianNetworkClassifier(iris_edge_list)
     with pytest.raises(
             ValueError,
             match="No CPDs found. The model has not been fitted",
     ):
         clf.predict(df)
    def test_predict_fixed(self, iris_test_data, iris_edge_list):
        df = iris_test_data.copy()

        ground_truth = np.array([
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [2, 2, 2, 1, 1, 1, 2, 0, 1, 1],
            [0, 2, 1, 1, 0, 2, 2, 1, 1, 1],
            [2, 1, 1, 1, 1, 2, 1, 2, 1, 0],
            [1, 1, 1, 1, 2, 2, 2, 1, 2, 1],
            [1, 2, 1, 0, 1, 2, 1, 1, 0, 1],
            [2, 1, 2, 1, 2, 2, 1, 1, 1, 2],
            [2, 1, 2, 1, 1, 2, 2, 2, 1, 1],
            [2, 1, 1, 1, 2, 2, 1, 2, 1, 2],
            [1, 2, 1, 1, 1, 2, 2, 2, 2, 2],
            [2, 2, 1, 2, 2, 2, 1, 2, 2, 2],
        ])

        discretiser_params = {
            "sepal width (cm)": {
                "method": "fixed",
                "numeric_split_points": [3]
            },
            "petal length (cm)": {
                "method": "fixed",
                "numeric_split_points": [3.7]
            },
            "petal width (cm)": {
                "method": "fixed",
                "numeric_split_points": [1.2]
            },
        }

        label = df["sepal length (cm)"]
        df.drop(["sepal length (cm)"], axis=1, inplace=True)
        clf = BayesianNetworkClassifier(
            iris_edge_list,
            discretiser_kwargs=discretiser_params,
            discretiser_alg={
                "sepal width (cm)": "unsupervised",
                "petal length (cm)": "unsupervised",
                "petal width (cm)": "unsupervised",
            },
        )
        clf.fit(df, label)
        output = clf.predict(df)
        assert np.array_equal(output.reshape(15, -1), ground_truth)
    def test_dt_discretiser(self, iris_test_data, iris_edge_list):
        df = iris_test_data.copy()
        ground_truth = np.array([
            [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
            [1, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [2, 2, 2, 1, 1, 1, 2, 0, 1, 1],
            [0, 1, 1, 1, 1, 2, 1, 1, 1, 1],
            [2, 1, 1, 1, 1, 1, 1, 1, 1, 0],
            [1, 1, 1, 1, 1, 1, 2, 1, 1, 1],
            [1, 1, 1, 0, 1, 1, 1, 1, 0, 1],
            [2, 1, 2, 2, 2, 2, 1, 2, 2, 2],
            [2, 2, 2, 1, 1, 2, 2, 2, 2, 1],
            [2, 1, 2, 1, 2, 2, 1, 1, 2, 2],
            [2, 2, 2, 1, 2, 2, 2, 2, 1, 2],
            [2, 2, 1, 2, 2, 2, 1, 2, 2, 1],
        ])
        supervised_param = {
            "sepal width (cm)": {
                "max_depth": 2,
                "random_state": 2020
            },
            "petal length (cm)": {
                "max_depth": 2,
                "random_state": 2020
            },
            "petal width (cm)": {
                "max_depth": 2,
                "random_state": 2020
            },
        }

        label = df["sepal length (cm)"]
        df.drop(["sepal length (cm)"], axis=1, inplace=True)
        clf = BayesianNetworkClassifier(
            iris_edge_list,
            discretiser_kwargs=supervised_param,
            discretiser_alg={
                "sepal width (cm)": "tree",
                "petal length (cm)": "tree",
                "petal width (cm)": "tree",
            },
        )
        clf.fit(df, label)
        output = clf.predict(df)
        assert np.array_equal(output.reshape(15, -1), ground_truth)
 def test_default_params(self):
     edge_list = [
         ("b", "a"),
         ("b", "c"),
         ("d", "a"),
         ("d", "c"),
         ("d", "b"),
         ("e", "c"),
         ("e", "b"),
     ]
     clf = BayesianNetworkClassifier(edge_list)
     params = clf.get_params()
     assert params["discretiser_alg"] == {}
     assert params["probability_kwargs"]["method"] == "BayesianEstimator"
     assert params["probability_kwargs"]["bayes_prior"] == "K2"
     assert params["discretiser_kwargs"] == {}
 def test_missing_kwargs(self, iris_edge_list):
     supervised_param = {
         "sepal width (cm)": {
             "min_depth": 0,
             "random_state": 2020
         },
         "petal length (cm)": {
             "min_depth": 0,
             "random_state": 2020
         },
     }
     discretiser_alg = {
         "sepal width (cm)": "tree",
         "petal length (cm)": "tree",
         "petal width (cm)": "mdlp",
     }
     with pytest.raises(
             ValueError,
             match=
             "discretiser_alg and discretiser_kwargs should have the same keys",
     ):
         BayesianNetworkClassifier(
             iris_edge_list,
             discretiser_alg=discretiser_alg,
             discretiser_kwargs=supervised_param,
         )
    def test_shuffled_data(self, iris_test_data, iris_edge_list):
        df = iris_test_data.copy()
        df = df.sample(frac=0.5, random_state=2020)
        ground_truth = np.array([
            [2, 0, 1, 2, 2, 1, 2, 0, 0, 0, 2, 1, 0, 2, 2],
            [0, 1, 2, 2, 0, 0, 1, 2, 0, 2, 1, 1, 2, 0, 0],
            [2, 0, 0, 0, 2, 0, 0, 1, 0, 1, 0, 2, 1, 0, 2],
            [2, 1, 2, 2, 0, 0, 1, 1, 0, 2, 1, 2, 2, 1, 1],
            [0, 0, 0, 0, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 0],
        ])
        supervised_param = {
            "sepal width (cm)": {
                "max_depth": 2,
                "random_state": 2020
            },
            "petal length (cm)": {
                "max_depth": 2,
                "random_state": 2020
            },
            "petal width (cm)": {
                "max_depth": 2,
                "random_state": 2020
            },
        }

        label = df["sepal length (cm)"]
        df.drop(["sepal length (cm)"], axis=1, inplace=True)
        clf = BayesianNetworkClassifier(
            iris_edge_list,
            discretiser_kwargs=supervised_param,
            discretiser_alg={
                "sepal width (cm)": "tree",
                "petal length (cm)": "tree",
                "petal width (cm)": "tree",
            },
        )
        clf.fit(df, label)
        output = clf.predict(df)
        assert np.isnan(output).sum() == 0
        assert (ground_truth == output.reshape(5, 15)).all()
    def test_invalid_algorithm(self, iris_edge_list):

        with pytest.raises(
                KeyError,
                match="Some discretiser algorithms are not supported"):
            BayesianNetworkClassifier(
                iris_edge_list,
                discretiser_alg={
                    "sepal width (cm)": "invalid",
                    "petal length (cm)": "invalid",
                    "petal width (cm)": "mdlp",
                },
            )
    def test_return_probability(self, iris_test_data, iris_edge_list):
        df = iris_test_data.copy()

        discretiser_params = {
            "sepal width (cm)": {
                "method": "fixed",
                "numeric_split_points": [3]
            },
            "petal length (cm)": {
                "method": "fixed",
                "numeric_split_points": [3.7]
            },
            "petal width (cm)": {
                "method": "fixed",
                "numeric_split_points": [1.2]
            },
        }

        label = df["sepal length (cm)"]
        df.drop(["sepal length (cm)"], axis=1, inplace=True)
        clf = BayesianNetworkClassifier(
            iris_edge_list,
            discretiser_kwargs=discretiser_params,
            discretiser_alg={
                "sepal width (cm)": "unsupervised",
                "petal length (cm)": "unsupervised",
                "petal width (cm)": "unsupervised",
            },
            return_prob=True,
        )
        clf.fit(df, label)
        output = clf.predict(df.iloc[0:1])
        assert len(list(output)) == 3
        assert math.isclose(output["sepal length (cm)_0"].values,
                            0.764706,
                            abs_tol=1e-3)
        assert math.isclose(output["sepal length (cm)_1"].values,
                            0.215686,
                            abs_tol=1e-3)