def _compute_sklearn_tree_importances(self, estimator, X=None, weighted=True, normalize=True): """Compute MDI importances following scikit-learn API on a tree-like model Parameters ---------- X : np.ndarray, pd.DataFrame or None, optional data used to compute MDI; when None use original dataset, by default None weighted : bool, optional if MDI should be computed based on weighted node samples, by default True normalize : bool, optional if MDI should be normalized after computing, by default True Returns ------- np.ndarray MDI importances in same order than the features """ importances = np.zeros(self.n_features_) nodes = get_sklearn_nodes_from(estimator, X=X, weighted=weighted) for n in nodes: if isinstance(n, Node): left, right = nodes[n.left], nodes[n.right] importances[ n.feature] += self._compute_impurity_importance_from( n, left, right) importances /= nodes[0].n_node_samples if normalize: importances = _normalize(importances, axis=None) return importances
def _compute_sklearn_forest_importances(self, X=None, weighted=True, normalize=True): """Compute MDI importances following scikit-learn API on a forest-like model Parameters ---------- X : np.ndarray, pd.DataFrame or None, optional data used to compute MDI; when None use original dataset, by default None weighted : bool, optional if MDI should be computed based on weighted node samples, by default True normalize : bool, optional if MDI should be normalized after computing, by default True Returns ------- np.ndarray MDI importances in same order than the features """ trees = self.estimator.estimators_ importances = np.zeros(self.n_features_) n_estimators = 0 for e in trees: if isinstance(e, BaseDecisionTree): n_estimators += 1 importances += self._compute_sklearn_tree_importances( e, X=X, weighted=weighted, normalize=normalize) else: # specific case of sklearn gradient boosting models for e_ in e: if e_.tree_.node_count > 1: n_estimators += 1 importances += self._compute_sklearn_tree_importances( e_, X=X, weighted=weighted, normalize=False) importances /= n_estimators if normalize: importances = _normalize(importances, axis=None) return importances
def test__normalize(): array = np.array([1, 2, 3, 4]) normalized_array = _normalize(array, axis=None) expected_array = np.array([.1, .2, .3, .4]) np.testing.assert_allclose(normalized_array, expected_array)
def test__normalize_error(): array = np.array(['a', 'b', 'c', 'd']) with pytest.raises(TypeError): _normalize(array, axis=None)
def test__normalize_zero_sum(): array = np.array([0, 0, 0, 0]) normalized_array = _normalize(array, axis=None) expected_array = np.array([.0, .0, .0, .0]) np.testing.assert_allclose(normalized_array, expected_array)