예제 #1
0
    def _compute_sklearn_tree_importances(self,
                                          estimator,
                                          X=None,
                                          weighted=True,
                                          normalize=True):
        """Compute MDI importances following scikit-learn API on a tree-like model

        Parameters
        ----------
        X : np.ndarray, pd.DataFrame or None, optional
            data used to compute MDI; when None use original dataset, by default None
        weighted : bool, optional
            if MDI should be computed based on weighted node samples, by default True
        normalize : bool, optional
            if MDI should be normalized after computing, by default True

        Returns
        -------
        np.ndarray
            MDI importances in same order than the features

        """
        importances = np.zeros(self.n_features_)
        nodes = get_sklearn_nodes_from(estimator, X=X, weighted=weighted)
        for n in nodes:
            if isinstance(n, Node):
                left, right = nodes[n.left], nodes[n.right]
                importances[
                    n.feature] += self._compute_impurity_importance_from(
                        n, left, right)
        importances /= nodes[0].n_node_samples
        if normalize:
            importances = _normalize(importances, axis=None)
        return importances
예제 #2
0
    def _compute_sklearn_forest_importances(self,
                                            X=None,
                                            weighted=True,
                                            normalize=True):
        """Compute MDI importances following scikit-learn API on a forest-like model

        Parameters
        ----------
        X : np.ndarray, pd.DataFrame or None, optional
            data used to compute MDI; when None use original dataset, by default None
        weighted : bool, optional
            if MDI should be computed based on weighted node samples, by default True
        normalize : bool, optional
            if MDI should be normalized after computing, by default True

        Returns
        -------
        np.ndarray
            MDI importances in same order than the features

        """
        trees = self.estimator.estimators_
        importances = np.zeros(self.n_features_)
        n_estimators = 0

        for e in trees:
            if isinstance(e, BaseDecisionTree):
                n_estimators += 1
                importances += self._compute_sklearn_tree_importances(
                    e, X=X, weighted=weighted, normalize=normalize)
            else:  # specific case of sklearn gradient boosting models
                for e_ in e:
                    if e_.tree_.node_count > 1:
                        n_estimators += 1
                        importances += self._compute_sklearn_tree_importances(
                            e_, X=X, weighted=weighted, normalize=False)

        importances /= n_estimators
        if normalize:
            importances = _normalize(importances, axis=None)

        return importances
예제 #3
0
def test__normalize():
    array = np.array([1, 2, 3, 4])
    normalized_array = _normalize(array, axis=None)
    expected_array = np.array([.1, .2, .3, .4])

    np.testing.assert_allclose(normalized_array, expected_array)
예제 #4
0
def test__normalize_error():
    array = np.array(['a', 'b', 'c', 'd'])
    with pytest.raises(TypeError):
        _normalize(array, axis=None)
예제 #5
0
def test__normalize_zero_sum():
    array = np.array([0, 0, 0, 0])
    normalized_array = _normalize(array, axis=None)
    expected_array = np.array([.0, .0, .0, .0])

    np.testing.assert_allclose(normalized_array, expected_array)