Beispiel #1
0
    def test_leaf_node_counts_correct_2(self, xgboost_2_split_1_tree):
        """Test the leaf_node_counts attribute has the correct values with 2nd hand workable example."""

        # rules for xgboost_2_split_1_tree are as follows;
        # leaf 1 - if (f0 < 0.5)
        # leaf 3 - if (f0 > 0.5) & (f1 < 0.5)
        # leaf 4 - if (f0 > 0.5) & (f1 > 0.5)

        # for this dataset the leaf nodes for each row are inline below;
        xgb_data = xgb.DMatrix(
            data=np.array(
                [
                    [1, 1],  # leaf 4
                    [1, 0],  # leaf 3
                    [0, 1],  # leaf 1
                    [0, 0],  # leaf 1
                    [1, 0],  # leaf 3
                    [0, 1],  # leaf 1
                    [0, 0],  # leaf 1
                ]
            )
        )

        # therefore the leaf_node_counts attribute for a single tree
        # should be;
        expected_leaf_node_counts = [{1: 4, 3: 2, 4: 1}]

        confo_model = XGBoosterLeafNodeScaledConformalPredictor(xgboost_2_split_1_tree)

        confo_model._calibrate_leaf_node_counts(xgb_data)

        assert (
            confo_model.leaf_node_counts == expected_leaf_node_counts
        ), "leaf_node_counts not calculated correctly"
Beispiel #2
0
    def test_leaf_node_counts_excludes_non_visited_nodes(self, xgboost_2_split_2_tree):
        """Test that leaf_node_counts does not include nodes that were not visited when
        predicting on data.
        """

        # note, this dataset does not include any rows that visit tree 1, leaf 1
        # for this dataset the leaf nodes for each row are inline below;
        xgb_data = xgb.DMatrix(
            data=np.array(
                [
                    [1, 1],  # tree 1, leaf 2 > tree 2, leaf 2
                    [1, 1],  # tree 1, leaf 2 > tree 2, leaf 2
                    [1, 1],  # tree 1, leaf 2 > tree 2, leaf 2
                    [1, 1],  # tree 1, leaf 2 > tree 2, leaf 2
                    [1, 0],  # tree 1, leaf 2 > tree 2, leaf 1
                    [1, 0],  # tree 1, leaf 2 > tree 2, leaf 1
                    [1, 0],  # tree 1, leaf 2 > tree 2, leaf 1
                    [1, 0],  # tree 1, leaf 2 > tree 2, leaf 1
                ]
            )
        )

        # therefore the leaf_node_counts attribute for a single tree
        # should be;
        expected_leaf_node_counts = [{2: 8}, {1: 4, 2: 4}]

        confo_model = XGBoosterLeafNodeScaledConformalPredictor(xgboost_2_split_2_tree)

        confo_model._calibrate_leaf_node_counts(xgb_data)

        assert (
            confo_model.leaf_node_counts == expected_leaf_node_counts
        ), "leaf_node_counts not calculated correctly when nodes not visited"
Beispiel #3
0
    def test_predict_call(self, mocker, dmatrix_2x1_with_label, xgboost_1_split_1_tree):
        """Test that the output from xgb.Booster.predict with ntree_limit = best_iteration + 1
        is returned from the method.
        """

        confo_model = XGBoosterLeafNodeScaledConformalPredictor(xgboost_1_split_1_tree)

        confo_model.calibrate(dmatrix_2x1_with_label)

        predict_return_value = np.array([200, 101])

        mocked = mocker.patch.object(
            xgb.Booster, "predict", return_value=predict_return_value
        )

        results = confo_model._generate_predictions(dmatrix_2x1_with_label)

        assert (
            mocked.call_count == 1
        ), "incorrect number of calls to xgb.Booster.predict"

        np.testing.assert_array_equal(results, predict_return_value)

        call_args = mocked.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        assert call_pos_args == (
            dmatrix_2x1_with_label,
        ), "positional args incorrect in call to xgb.Booster.predict"

        assert call_kwargs == {
            "ntree_limit": xgboost_1_split_1_tree.best_iteration + 1
        }, "positional args incorrect in call to xgb.Booster.predict"
Beispiel #4
0
    def test_leaf_node_counts_correct_1(
        self, xgboost_2_split_1_tree, dmatrix_4x2_with_label
    ):
        """Test the leaf_node_counts attribute has the correct values with hand workable example."""

        # rules for xgboost_2_split_1_tree are as follows;
        # leaf 1 - if (f0 < 0.5)
        # leaf 3 - if (f0 > 0.5) & (f1 < 0.5)
        # leaf 4 - if (f0 > 0.5) & (f1 > 0.5)

        # there for the dmatrix_4x2_with_label data will be mapped to;
        # [1, 1] - leaf 4
        # [1, 0] - leaf 3
        # [0, 1] - leaf 1
        # [0, 0] - leaf 1

        # therefore the leaf_node_counts attribute for a single tree
        # should be;
        expected_leaf_node_counts = [{1: 2, 3: 1, 4: 1}]

        confo_model = XGBoosterLeafNodeScaledConformalPredictor(xgboost_2_split_1_tree)

        confo_model._calibrate_leaf_node_counts(dmatrix_4x2_with_label)

        assert (
            confo_model.leaf_node_counts == expected_leaf_node_counts
        ), "leaf_node_counts not calculated correctly"
Beispiel #5
0
    def test_data_type_exception(self, xgboost_1_split_1_tree):
        """Test an exception is raised if data is not a xgb.DMatrix object."""

        confo_model = XGBoosterLeafNodeScaledConformalPredictor(xgboost_1_split_1_tree)

        with pytest.raises(
            TypeError,
            match=re.escape(
                f"data is not in expected types {[xgb.DMatrix]}, got {str}"
            ),
        ):

            confo_model.calibrate("abcd")
Beispiel #6
0
    def test_data_type_exception(self, dmatrix_2x1_with_label, xgboost_1_split_1_tree):
        """Test an exception is raised if data is not a xgb.DMatrix object."""

        confo_model = XGBoosterLeafNodeScaledConformalPredictor(xgboost_1_split_1_tree)

        with pytest.raises(
            TypeError,
            match=re.escape(
                f"data is not in expected types {[xgb.DMatrix]}, got {list}"
            ),
        ):

            confo_model._generate_leaf_node_predictions([])
Beispiel #7
0
    def test_no_leaf_node_counts_attribute_exception(
        self, dmatrix_2x1_with_label, xgboost_1_split_1_tree
    ):
        """Test an exception is raised if leaf_node_counts attribute is not present."""

        confo_model = XGBoosterLeafNodeScaledConformalPredictor(xgboost_1_split_1_tree)

        assert not hasattr(
            confo_model, "leaf_node_counts"
        ), "XGBoosterLeafNodeScaledConformalPredictor has leaf_node_counts attribute prior to running calibrate"

        with pytest.raises(
            AttributeError,
            match="XGBoosterLeafNodeScaledConformalPredictor does not have leaf_node_counts"
            " attribute, run calibrate first.",
        ):

            confo_model.predict_with_interval(dmatrix_2x1_with_label)
Beispiel #8
0
    def test_test_leaf_node_counts_correct(self, dataset, params_dict, request):
        """Test leaf_node_counts is calculated correctly - on larger models that require automated
        calculation to check against what is produced by _calibrate_leaf_node_counts.
        """

        # this is used to parameterise which fixture to use to provide the data
        dataset = request.getfixturevalue(dataset)

        # build model with params passed
        model = xgb.train(
            params=params_dict,
            dtrain=dataset[0],
            num_boost_round=500,
            evals=[(dataset[1], "validate")],
            early_stopping_rounds=5,
            verbose_eval=False,
        )

        confo_model = XGBoosterLeafNodeScaledConformalPredictor(model)

        # set leaf_node_counts attribute
        # note, we are using a different dataset to training so not guaranteed to have
        # every leaf node in the model visited
        confo_model._calibrate_leaf_node_counts(dataset[2])

        # now calculate values (leaf node counts) from scratch
        # first generate leaf node predictions
        leaf_node_predictions = model.predict(
            data=dataset[2], pred_leaf=True, ntree_limit=model.best_iteration + 1
        )

        # loop through each column i.e. tree
        for column_no in range(leaf_node_predictions.shape[1]):

            # these are the counts we expected to see in confo_model.leaf_node_counts[column_no]
            # unless a particular node was not visited at all in the dataset
            counts = (
                pd.Series(leaf_node_predictions[:, column_no]).value_counts().to_dict()
            )

            assert (
                confo_model.leaf_node_counts[column_no] == counts
            ), f"incorrect leaf node count for tree {column_no}"
Beispiel #9
0
    def test_model_type_exception(self):
        """Test an exception is raised if model is not a xgb.Booster object."""

        with pytest.raises(
            TypeError,
            match=re.escape(
                f"model is not in expected types {[xgb.Booster]}, got {tuple}"
            ),
        ):

            XGBoosterLeafNodeScaledConformalPredictor((1, 2, 3))
Beispiel #10
0
    def test_output_2d(self, mocker, dmatrix_2x1_with_label, xgboost_1_split_1_tree):
        """Test the array returned from _generate_leaf_node_predictions is a 2d array
        even if the output from predict is 1d.
        """

        confo_model = XGBoosterLeafNodeScaledConformalPredictor(xgboost_1_split_1_tree)

        confo_model.calibrate(dmatrix_2x1_with_label)

        # set the return value from predict to be a 1d array
        predict_return_value = np.array([200, 101])

        mocker.patch.object(xgb.Booster, "predict", return_value=predict_return_value)

        results = confo_model._generate_leaf_node_predictions(dmatrix_2x1_with_label)

        expected_results = predict_return_value.reshape(
            predict_return_value.shape[0], 1
        )

        np.testing.assert_array_equal(results, expected_results)
Beispiel #11
0
    def test_super_predict_with_interval_call(
        self, mocker, dmatrix_2x1_with_label, xgboost_1_split_1_tree
    ):
        """Test that LeafNodeScaledConformalPredictor.predict_with_interval is called and the
        outputs of this are returned from the method.
        """

        confo_model = XGBoosterLeafNodeScaledConformalPredictor(xgboost_1_split_1_tree)

        confo_model.calibrate(dmatrix_2x1_with_label)

        predict_return_value = np.array([200, 101, 1234])

        mocked = mocker.patch.object(
            pitci.base.LeafNodeScaledConformalPredictor,
            "predict_with_interval",
            return_value=predict_return_value,
        )

        results = confo_model.predict_with_interval(dmatrix_2x1_with_label)

        # test output of predict_with_interval is the return value of
        # LeafNodeScaledConformalPredictor.predict_with_interval
        np.testing.assert_array_equal(results, predict_return_value)

        assert (
            mocked.call_count == 1
        ), "incorrect number of calls to super().predict_with_interval"

        call_args = mocked.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        assert (
            call_pos_args == ()
        ), "positional args incorrect in call to LeafNodeScaledConformalPredictor.predict_with_interval"

        assert call_kwargs == {
            "data": dmatrix_2x1_with_label
        }, "keyword args incorrect in call to LeafNodeScaledConformalPredictor.predict_with_interval"
Beispiel #12
0
    def test_super_calibrate_call_response_passed(
        self, mocker, dmatrix_2x1_with_label, xgboost_1_split_1_tree
    ):
        """Test XGBoosterLeafNodeScaledConformalPredictor.calibrate call when response is passed."""

        confo_model = XGBoosterLeafNodeScaledConformalPredictor(xgboost_1_split_1_tree)

        mocked = mocker.patch.object(
            pitci.base.LeafNodeScaledConformalPredictor, "calibrate"
        )

        response_array = np.array([4, 5])

        confo_model.calibrate(
            data=dmatrix_2x1_with_label, alpha=0.5, response=response_array
        )

        assert (
            mocked.call_count == 1
        ), "incorrect number of calls to LeafNodeScaledConformalPredictor.calibrate"

        call_args = mocked.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        assert (
            call_pos_args == ()
        ), "positional args incorrect in call to LeafNodeScaledConformalPredictor.calibrate"

        assert (
            call_kwargs["alpha"] == 0.5
        ), "alpha incorrect in call to LeafNodeScaledConformalPredictor.calibrate"

        np.testing.assert_array_equal(call_kwargs["response"], response_array)

        assert (
            call_kwargs["data"] == dmatrix_2x1_with_label
        ), "data incorrect in call to LeafNodeScaledConformalPredictor.calibrate"
Beispiel #13
0
    def test_attributes_set(self, xgboost_1_split_1_tree):
        """Test that SUPPORTED_OBJECTIVES, version and model attributes are set."""

        confo_model = XGBoosterLeafNodeScaledConformalPredictor(xgboost_1_split_1_tree)

        assert (
            confo_model.__version__ == pitci.__version__
        ), "__version__ attribute not set to package version value"

        assert (
            confo_model.model is xgboost_1_split_1_tree
        ), "model attribute not set with the value passed in init"

        assert (
            confo_model.SUPPORTED_OBJECTIVES
            == pitci.xgboost.SUPPORTED_OBJECTIVES_ABS_ERROR
        ), "SUPPORTED_OBJECTIVES attribute incorrect"
Beispiel #14
0
    def test_data_type_exception(self, dmatrix_2x1_with_label, xgboost_1_split_1_tree):
        """Test an exception is raised if data is not a xgb.DMatrix object."""

        confo_model = XGBoosterLeafNodeScaledConformalPredictor(xgboost_1_split_1_tree)

        confo_model.calibrate(dmatrix_2x1_with_label)

        with pytest.raises(
            TypeError,
            match=re.escape(
                f"data is not in expected types {[xgb.DMatrix]}, got {pd.DataFrame}"
            ),
        ):

            confo_model.predict_with_interval(pd.DataFrame())
Beispiel #15
0
    def test_check_objective_supported_called(self, mocker, xgboost_1_split_1_tree):
        """Test that check_objective_supported is called in init."""

        mocked = mocker.patch.object(pitci.xgboost, "check_objective_supported")

        XGBoosterLeafNodeScaledConformalPredictor(xgboost_1_split_1_tree)

        assert (
            mocked.call_count == 1
        ), "check_objective_supported not called (once) in init"

        call_args = mocked.call_args_list[0]
        call_pos_args = call_args[0]
        call_kwargs = call_args[1]

        assert call_pos_args == (
            xgboost_1_split_1_tree,
            pitci.xgboost.SUPPORTED_OBJECTIVES_ABS_ERROR,
        ), "positional args in check_objective_supported call not correct"

        assert (
            call_kwargs == {}
        ), "keyword args in check_objective_supported call not correct"