def get_tree_objects(self, tree_models, fields, classes): trees = list() for i, tree_model in enumerate(tree_models): if 'list' in str(type(tree_model)): tree_inner = list() for tree_mod in tree_model: main_node = tree_mod.get_Node() all_node = main_node.get_Node() if len(all_node) == 0: continue operator = all_node[0].get_SimplePredicate().get_operator() tt = Tree(fields, [1], operator) tt.get_node_info(all_node) tt.build_tree() model = DecisionTreeRegressor() model.n_features = len(fields) model.n_features_ = len(fields) model.n_outputs_ = 1 model.n_outputs = 1 model.classes_ = np.array(classes) model.tree_ = tt tree_inner.append(model) trees.append(tree_inner) else: main_node = tree_model.get_Node() all_node = main_node.get_Node() if len(all_node) == 0: continue operator = all_node[0].get_SimplePredicate().get_operator() tt = Tree(fields, classes, operator) tt.get_node_info(all_node) tt.build_tree() model = DecisionTreeClassifier() model.n_features = len(fields) model.n_features_ = len(fields) model.n_outputs_ = 1 model.n_outputs = 1 model.classes_ = np.array(classes) model._estimator_type = 'classifier' if len( classes) > 0 else 'regressor' model.tree_ = tt trees.append(model) return trees
def digitize2tree(bins, right=False): """ Builds a decision tree which returns the same result as `lambda x: numpy.digitize(x, bins, right=right)` (see :epkg:`numpy:digitize`). :param bins: array of bins. It has to be 1-dimensional and monotonic. :param right: Indicating whether the intervals include the right or the left bin edge. Default behavior is (right==False) indicating that the interval does not include the right edge. The left bin end is open in this case, i.e., `bins[i-1] <= x < bins[i]` is the default behavior for monotonically increasing bins. :return: decision tree .. note:: The implementation of decision trees in :epkg:`scikit-learn` only allows one type of decision (`<=`). That's why the function throws an exception when `right=False`. However, this could be overcome by using :epkg:`ONNX` where all kind of decision rules are implemented. Default value for right is still *False* to follow *numpy* API even though this value raises an exception in *digitize2tree*. The following example shows what the tree looks like. .. runpython:: :showcode: import numpy from sklearn.tree import export_text from mlinsights.mltree import digitize2tree x = numpy.array([0.2, 6.4, 3.0, 1.6]) bins = numpy.array([0.0, 1.0, 2.5, 4.0, 7.0]) expected = numpy.digitize(x, bins, right=True) tree = digitize2tree(bins, right=True) pred = tree.predict(x.reshape((-1, 1))) print("Comparison with numpy:") print(expected, pred) print("Tree:") print(export_text(tree, feature_names=['x'])) See also example :ref:`l-example-digitize`. .. versionadded:: 0.4 """ if not right: raise RuntimeError( "right must be True not right=%r" % right) ascending = len(bins) <= 1 or bins[0] < bins[1] if not ascending: bins2 = bins[::-1] cl = digitize2tree(bins2, right=right) n = len(bins) for i in range(cl.tree_.value.shape[0]): cl.tree_.value[i, 0, 0] = n - cl.tree_.value[i, 0, 0] return cl tree = Tree(1, numpy.array([1], dtype=numpy.intp), 1) values = [] UNUSED = numpy.nan n_nodes = [] def add_root(index): if index < 0 or index >= len(bins): raise IndexError( # pragma: no cover "Unexpected index %d / len(bins)=%d." % ( index, len(bins))) parent = -1 is_left = False is_leaf = False threshold = bins[index] n = tree_add_node( tree, parent, is_left, is_leaf, 0, threshold, 0, 1, 1.) values.append(UNUSED) n_nodes.append(n) return n def add_nodes(parent, i, j, is_left): # add for bins[i:j] (j excluded) if is_left: # it means j is the parent split if i == j: # leaf n = tree_add_node(tree, parent, is_left, True, 0, 0, 0, 1, 1.) n_nodes.append(n) values.append(i) return n if i + 1 == j: # split values.append(UNUSED) th = bins[i] n = tree_add_node(tree, parent, is_left, False, 0, th, 0, 1, 1.) n_nodes.append(n) add_nodes(n, i, i, True) add_nodes(n, i, j, False) return n if i + 1 < j: # split values.append(UNUSED) index = (i + j) // 2 th = bins[index] n = tree_add_node(tree, parent, is_left, False, 0, th, 0, 1, 1.) n_nodes.append(n) add_nodes(n, i, index, True) add_nodes(n, index, j, False) return n else: # it means i is the parent split if i + 1 == j: # leaf values.append(j) n = tree_add_node(tree, parent, is_left, True, 0, 0, 0, 1, 1.) n_nodes.append(n) return n if i + 1 < j: # split values.append(UNUSED) index = (i + j) // 2 th = bins[index] n = tree_add_node(tree, parent, is_left, False, 0, th, 0, 1, 1.) n_nodes.append(n) add_nodes(n, i, index, True) add_nodes(n, index, j, False) return n raise NotImplementedError( # pragma: no cover "Unexpected case where i=%r, j=%r, is_left=%r." % ( i, j, is_left)) index = len(bins) // 2 add_root(index) add_nodes(0, 0, index, True) add_nodes(0, index, len(bins), False) cl = DecisionTreeRegressor() cl.tree_ = tree cl.tree_.value[:, 0, 0] = numpy.array( # pylint: disable=E1137 values, dtype=numpy.float64) cl.n_outputs = 1 cl.n_outputs_ = 1 try: # scikit-learn >= 0.24 cl.n_features_in_ = 1 except AttributeError: # scikit-learn < 0.24 cl.n_features_ = 1 try: # for scikit-learn<=0.23.2 cl.n_features_ = 1 except AttributeError: pass return cl