Exemple #1
0
    def get_tree_objects(self, tree_models, fields, classes):

        trees = list()
        for i, tree_model in enumerate(tree_models):
            if 'list' in str(type(tree_model)):
                tree_inner = list()
                for tree_mod in tree_model:
                    main_node = tree_mod.get_Node()
                    all_node = main_node.get_Node()
                    if len(all_node) == 0:
                        continue
                    operator = all_node[0].get_SimplePredicate().get_operator()
                    tt = Tree(fields, [1], operator)
                    tt.get_node_info(all_node)
                    tt.build_tree()
                    model = DecisionTreeRegressor()
                    model.n_features = len(fields)
                    model.n_features_ = len(fields)
                    model.n_outputs_ = 1
                    model.n_outputs = 1
                    model.classes_ = np.array(classes)
                    model.tree_ = tt
                    tree_inner.append(model)
                trees.append(tree_inner)
            else:
                main_node = tree_model.get_Node()
                all_node = main_node.get_Node()
                if len(all_node) == 0:
                    continue
                operator = all_node[0].get_SimplePredicate().get_operator()
                tt = Tree(fields, classes, operator)
                tt.get_node_info(all_node)
                tt.build_tree()
                model = DecisionTreeClassifier()
                model.n_features = len(fields)
                model.n_features_ = len(fields)
                model.n_outputs_ = 1
                model.n_outputs = 1
                model.classes_ = np.array(classes)
                model._estimator_type = 'classifier' if len(
                    classes) > 0 else 'regressor'
                model.tree_ = tt
                trees.append(model)
        return trees
Exemple #2
0
def digitize2tree(bins, right=False):
    """
    Builds a decision tree which returns the same result as
    `lambda x: numpy.digitize(x, bins, right=right)`
    (see :epkg:`numpy:digitize`).

    :param bins: array of bins. It has to be 1-dimensional and monotonic.
    :param right: Indicating whether the intervals include the right
        or the left bin edge. Default behavior is (right==False)
        indicating that the interval does not include the right edge.
        The left bin end is open in this case, i.e.,
        `bins[i-1] <= x < bins[i]` is the default behavior for
        monotonically increasing bins.
    :return: decision tree

    .. note::
        The implementation of decision trees in :epkg:`scikit-learn`
        only allows one type of decision (`<=`). That's why the
        function throws an exception when `right=False`. However,
        this could be overcome by using :epkg:`ONNX` where all
        kind of decision rules are implemented. Default value for
        right is still *False* to follow *numpy* API even though
        this value raises an exception in *digitize2tree*.

    The following example shows what the tree looks like.

    .. runpython::
        :showcode:

        import numpy
        from sklearn.tree import export_text
        from mlinsights.mltree import digitize2tree

        x = numpy.array([0.2, 6.4, 3.0, 1.6])
        bins = numpy.array([0.0, 1.0, 2.5, 4.0, 7.0])
        expected = numpy.digitize(x, bins, right=True)
        tree = digitize2tree(bins, right=True)
        pred = tree.predict(x.reshape((-1, 1)))
        print("Comparison with numpy:")
        print(expected, pred)
        print("Tree:")
        print(export_text(tree, feature_names=['x']))

    See also example :ref:`l-example-digitize`.

    .. versionadded:: 0.4
    """
    if not right:
        raise RuntimeError(
            "right must be True not right=%r" % right)
    ascending = len(bins) <= 1 or bins[0] < bins[1]

    if not ascending:
        bins2 = bins[::-1]
        cl = digitize2tree(bins2, right=right)
        n = len(bins)
        for i in range(cl.tree_.value.shape[0]):
            cl.tree_.value[i, 0, 0] = n - cl.tree_.value[i, 0, 0]
        return cl

    tree = Tree(1, numpy.array([1], dtype=numpy.intp), 1)
    values = []
    UNUSED = numpy.nan
    n_nodes = []

    def add_root(index):
        if index < 0 or index >= len(bins):
            raise IndexError(  # pragma: no cover
                "Unexpected index %d / len(bins)=%d." % (
                    index, len(bins)))
        parent = -1
        is_left = False
        is_leaf = False
        threshold = bins[index]
        n = tree_add_node(
            tree, parent, is_left, is_leaf, 0, threshold, 0, 1, 1.)
        values.append(UNUSED)
        n_nodes.append(n)
        return n

    def add_nodes(parent, i, j, is_left):
        # add for bins[i:j] (j excluded)
        if is_left:
            # it means j is the parent split
            if i == j:
                # leaf
                n = tree_add_node(tree, parent, is_left, True, 0, 0, 0, 1, 1.)
                n_nodes.append(n)
                values.append(i)
                return n
            if i + 1 == j:
                # split
                values.append(UNUSED)
                th = bins[i]
                n = tree_add_node(tree, parent, is_left,
                                  False, 0, th, 0, 1, 1.)
                n_nodes.append(n)
                add_nodes(n, i, i, True)
                add_nodes(n, i, j, False)
                return n
            if i + 1 < j:
                # split
                values.append(UNUSED)
                index = (i + j) // 2
                th = bins[index]
                n = tree_add_node(tree, parent, is_left,
                                  False, 0, th, 0, 1, 1.)
                n_nodes.append(n)
                add_nodes(n, i, index, True)
                add_nodes(n, index, j, False)
                return n
        else:
            # it means i is the parent split
            if i + 1 == j:
                # leaf
                values.append(j)
                n = tree_add_node(tree, parent, is_left, True, 0, 0, 0, 1, 1.)
                n_nodes.append(n)
                return n
            if i + 1 < j:
                # split
                values.append(UNUSED)
                index = (i + j) // 2
                th = bins[index]
                n = tree_add_node(tree, parent, is_left,
                                  False, 0, th, 0, 1, 1.)
                n_nodes.append(n)
                add_nodes(n, i, index, True)
                add_nodes(n, index, j, False)
                return n
        raise NotImplementedError(  # pragma: no cover
            "Unexpected case where i=%r, j=%r, is_left=%r." % (
                i, j, is_left))

    index = len(bins) // 2
    add_root(index)
    add_nodes(0, 0, index, True)
    add_nodes(0, index, len(bins), False)

    cl = DecisionTreeRegressor()
    cl.tree_ = tree
    cl.tree_.value[:, 0, 0] = numpy.array(  # pylint: disable=E1137
        values, dtype=numpy.float64)
    cl.n_outputs = 1
    cl.n_outputs_ = 1
    try:
        # scikit-learn >= 0.24
        cl.n_features_in_ = 1
    except AttributeError:
        # scikit-learn < 0.24
        cl.n_features_ = 1
    try:
        # for scikit-learn<=0.23.2
        cl.n_features_ = 1
    except AttributeError:
        pass
    return cl