Ejemplo n.º 1
0
    def fit(
        self,
        X: numpy.ndarray,
        y: numpy.ndarray,
        search_spaces: numpy.ndarray,
        search_spaces_is_categorical: List[bool],
    ) -> None:
        assert X.shape[0] == y.shape[0]
        assert X.shape[1] == search_spaces.shape[0]
        assert X.shape[1] == len(search_spaces_is_categorical)
        assert search_spaces.shape[1] == 2

        encoder = _CategoricalFeaturesOneHotEncoder()
        X, search_spaces = encoder.fit_transform(X, search_spaces,
                                                 search_spaces_is_categorical)

        self._forest.fit(X, y)

        self._trees = [
            _FanovaTree(e.tree_, search_spaces)
            for e in self._forest.estimators_
        ]
        self._features_to_raw_features = encoder.features_to_raw_features
        self._variances = {}

        if all(tree.variance == 0 for tree in self._trees):
            # If all trees have 0 variance, we cannot assess any importances.
            # This could occur if for instance `X.shape[0] == 1`.
            raise RuntimeError("Encountered zero total variance in all trees.")
Ejemplo n.º 2
0
def tree() -> _FanovaTree:
    sklearn_tree = Mock()
    sklearn_tree.n_features = 3
    sklearn_tree.node_count = 5
    sklearn_tree.feature = [1, 2, -1, -1, -1]
    sklearn_tree.children_left = [1, 2, -1, -1, -1]
    sklearn_tree.children_right = [4, 3, -1, -1, -1]
    sklearn_tree.value = [-1.0, -1.0, 0.1, 0.2, 0.5]
    sklearn_tree.threshold = [0.5, 1.5, -1.0, -1.0, -1.0]

    search_spaces = numpy.array([[0.0, 1.0], [0.0, 1.0], [0.0, 2.0]])

    return _FanovaTree(tree=sklearn_tree, search_spaces=search_spaces)
Ejemplo n.º 3
0
    def fit(
        self,
        X: numpy.ndarray,
        y: numpy.ndarray,
        search_spaces: numpy.ndarray,
        column_to_encoded_columns: List[numpy.ndarray],
    ) -> None:
        assert X.shape[0] == y.shape[0]
        assert X.shape[1] == search_spaces.shape[0]
        assert search_spaces.shape[1] == 2

        self._forest.fit(X, y)

        self._trees = [_FanovaTree(e.tree_, search_spaces) for e in self._forest.estimators_]
        self._column_to_encoded_columns = column_to_encoded_columns
        self._variances = {}

        if all(tree.variance == 0 for tree in self._trees):
            # If all trees have 0 variance, we cannot assess any importances.
            # This could occur if for instance `X.shape[0] == 1`.
            raise RuntimeError("Encountered zero total variance in all trees.")