def _get_tree_state(self): """ Internal utility that returns an array behind scikit-learn's tree object from daal_model_ produced by call to fit """ check_is_fitted(self, ['daal_model_', '_cached_tree_state_']) if self._cached_tree_state_ is None: tree_state_class = d4p.getTreeState(self.daal_model_, int(self.n_classes_)) self._cached_tree_state_ = tree_state_class return self._cached_tree_state_
def estimators_(self): if hasattr(self, '_cached_estimators_'): if self._cached_estimators_: return self._cached_estimators_ if LooseVersion(sklearn_version) >= LooseVersion("0.22"): check_is_fitted(self) else: check_is_fitted(self, 'daal_model_') # convert model to estimators est = DecisionTreeClassifier( criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, min_impurity_split=self.min_impurity_split, random_state=None) # we need to set est.tree_ field with Trees constructed from Intel(R) DAAL solution estimators_ = [] for i in range(self.n_estimators): # print("Tree #{}".format(i)) est_i = clone(est) est_i.n_features_ = self.n_features_ est_i.n_outputs_ = self.n_outputs_ est_i.classes_ = self.classes_ est_i.n_classes_ = self.n_classes_ # treeState members: 'class_count', 'leaf_count', 'max_depth', 'node_ar', 'node_count', 'value_ar' tree_i_state_class = daal4py.getTreeState(self.daal_model_, i, self.n_classes_) node_ndarray = tree_i_state_class.node_ar value_ndarray = tree_i_state_class.value_ar value_shape = (node_ndarray.shape[0], self.n_outputs_, self.n_classes_) # assert np.allclose(value_ndarray, value_ndarray.astype(np.intc, casting='unsafe')), "Value array is non-integer" tree_i_state_dict = { 'max_depth': tree_i_state_class.max_depth, 'node_count': tree_i_state_class.node_count, 'nodes': tree_i_state_class.node_ar, 'values': tree_i_state_class.value_ar } # est_i.tree_ = Tree(self.n_features_, np.array([self.n_classes_], dtype=np.intp), self.n_outputs_) est_i.tree_.__setstate__(tree_i_state_dict) estimators_.append(est_i) self._cached_estimators_ = estimators_ return estimators_
def _estimators_(self): if hasattr(self, '_cached_estimators_'): if self._cached_estimators_: return self._cached_estimators_ if sklearn_check_version('0.22'): check_is_fitted(self) else: check_is_fitted(self, 'daal_model_') # convert model to estimators params = { 'criterion': self.criterion, 'max_depth': self.max_depth, 'min_samples_split': self.min_samples_split, 'min_samples_leaf': self.min_samples_leaf, 'min_weight_fraction_leaf': self.min_weight_fraction_leaf, 'max_features': self.max_features, 'max_leaf_nodes': self.max_leaf_nodes, 'min_impurity_decrease': self.min_impurity_decrease, 'random_state': None, } if not sklearn_check_version('1.0'): params['min_impurity_split'] = self.min_impurity_split est = DecisionTreeClassifier(**params) # we need to set est.tree_ field with Trees constructed from Intel(R) # oneAPI Data Analytics Library solution estimators_ = [] random_state_checked = check_random_state(self.random_state) for i in range(self.n_estimators): est_i = clone(est) est_i.set_params(random_state=random_state_checked.randint( np.iinfo(np.int32).max)) if sklearn_check_version('1.0'): est_i.n_features_in_ = self.n_features_in_ else: est_i.n_features_ = self.n_features_in_ est_i.n_outputs_ = self.n_outputs_ tree_i_state_class = daal4py.getTreeState(self.daal_model_, i) tree_i_state_dict = { 'max_depth': tree_i_state_class.max_depth, 'node_count': tree_i_state_class.node_count, 'nodes': tree_i_state_class.node_ar, 'values': tree_i_state_class.value_ar } est_i.tree_ = Tree(self.n_features_in_, np.array([1], dtype=np.intp), self.n_outputs_) est_i.tree_.__setstate__(tree_i_state_dict) estimators_.append(est_i) return estimators_
def _estimators_(self): if hasattr(self, '_cached_estimators_'): if self._cached_estimators_: return self._cached_estimators_ if LooseVersion(sklearn_version) >= LooseVersion("0.22"): check_is_fitted(self) else: check_is_fitted(self, 'daal_model_') # convert model to estimators est = DecisionTreeRegressor( criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, min_impurity_split=self.min_impurity_split, random_state=None) # we need to set est.tree_ field with Trees constructed from Intel(R) # oneAPI Data Analytics Library solution estimators_ = [] random_state_checked = check_random_state(self.random_state) for i in range(self.n_estimators): est_i = clone(est) est_i.set_params(random_state=random_state_checked.randint( np.iinfo(np.int32).max)) est_i.n_features_ = self.n_features_ est_i.n_outputs_ = self.n_outputs_ tree_i_state_class = daal4py.getTreeState(self.daal_model_, i) tree_i_state_dict = { 'max_depth': tree_i_state_class.max_depth, 'node_count': tree_i_state_class.node_count, 'nodes': tree_i_state_class.node_ar, 'values': tree_i_state_class.value_ar } est_i.tree_ = Tree(self.n_features_, np.array([1], dtype=np.intp), self.n_outputs_) est_i.tree_.__setstate__(tree_i_state_dict) estimators_.append(est_i) return estimators_
import daal4py as d4p from numpy import loadtxt, allclose from gradient_boosted_regression_batch import main as gbt_regression def printTree(nodes, values): def printNodes(node_id, nodes, values, level): node = nodes[node_id] value = values[node_id] if not math.isnan(node["threshold"]): print(" " * level + "Level " + str(level) + ": Feature = " + str(node["feature"]) + ", Threshold = " + str(node["threshold"])) else: print(" " * level + "Level " + str(level) + ", Value = " + str(value).replace(" ", "")) if node["left_child"] != -1: printNodes(node["left_child"], nodes, values, level + 1) if node["right_child"] != -1: printNodes(node["right_child"], nodes, values, level + 1) printNodes(0, nodes, values, 0) if __name__ == "__main__": # First get our result and model (train_result, _, _) = gbt_regression() # Retrieve and print all trees; encoded as in sklearn.ensamble.tree_.Tree for treeId in range(train_result.model.NumberOfTrees): treeState = d4p.getTreeState(train_result.model, treeId) printTree(treeState.node_ar, treeState.value_ar) print('Traversed {} trees.'.format(train_result.model.NumberOfTrees)) print('All looks good!')
def _estimators_(self): if hasattr(self, '_cached_estimators_'): if self._cached_estimators_: return self._cached_estimators_ if LooseVersion(sklearn_version) >= LooseVersion("0.22"): check_is_fitted(self) else: check_is_fitted(self, 'daal_model_') classes_ = self.classes_[0] n_classes_ = self.n_classes_[0] # convert model to estimators params = { 'criterion': self.criterion, 'max_depth': self.max_depth, 'min_samples_split': self.min_samples_split, 'min_samples_leaf': self.min_samples_leaf, 'min_weight_fraction_leaf': self.min_weight_fraction_leaf, 'max_features': self.max_features, 'max_leaf_nodes': self.max_leaf_nodes, 'min_impurity_decrease': self.min_impurity_decrease, 'random_state': None, } if not sklearn_check_version('1.0'): params['min_impurity_split'] = self.min_impurity_split est = DecisionTreeClassifier(**params) # we need to set est.tree_ field with Trees constructed from Intel(R) # oneAPI Data Analytics Library solution estimators_ = [] random_state_checked = check_random_state(self.random_state) for i in range(self.n_estimators): # print("Tree #{}".format(i)) est_i = clone(est) est_i.set_params(random_state=random_state_checked.randint( np.iinfo(np.int32).max)) if sklearn_check_version('1.0'): est_i.n_features_in_ = self.n_features_in_ else: est_i.n_features_ = self.n_features_in_ est_i.n_outputs_ = self.n_outputs_ est_i.classes_ = classes_ est_i.n_classes_ = n_classes_ # treeState members: 'class_count', 'leaf_count', 'max_depth', # 'node_ar', 'node_count', 'value_ar' tree_i_state_class = daal4py.getTreeState(self.daal_model_, i, n_classes_) # node_ndarray = tree_i_state_class.node_ar # value_ndarray = tree_i_state_class.value_ar # value_shape = (node_ndarray.shape[0], self.n_outputs_, # n_classes_) # assert np.allclose( # value_ndarray, value_ndarray.astype(np.intc, casting='unsafe') # ), "Value array is non-integer" tree_i_state_dict = { 'max_depth': tree_i_state_class.max_depth, 'node_count': tree_i_state_class.node_count, 'nodes': tree_i_state_class.node_ar, 'values': tree_i_state_class.value_ar } est_i.tree_ = Tree(self.n_features_in_, np.array([n_classes_], dtype=np.intp), self.n_outputs_) est_i.tree_.__setstate__(tree_i_state_dict) estimators_.append(est_i) self._cached_estimators_ = estimators_ return estimators_
def daal_fit(self, X, y): self._check_daal_supported_parameters() _supported_dtypes_ = [np.double, np.single] X = check_array(X, dtype=_supported_dtypes_) y = np.atleast_1d(y) if y.ndim == 2 and y.shape[1] == 1: warnings.warn( "A column-vector y was passed when a 1d array was" " expected. Please change the shape of y to " "(n_samples,), for example using ravel().", DataConversionWarning, stacklevel=2) y = check_array(y, ensure_2d=False, dtype=X.dtype) check_consistent_length(X, y) if y.ndim == 1: # reshape is necessary to preserve the data contiguity against vs # [:, np.newaxis] that does not. y = np.reshape(y, (-1, 1)) self.n_outputs_ = y.shape[1] self.n_features_ = X.shape[1] rs_ = check_random_state(self.random_state) if not self.bootstrap and self.oob_score: raise ValueError("Out of bag estimation only available" " if bootstrap=True") X_fptype = getFPType(X) seed_ = rs_.randint(0, np.iinfo('i').max) daal_engine = daal4py.engines_mt2203(seed=seed_, fptype=X_fptype) _featuresPerNode = _to_absolute_max_features(self.max_features, X.shape[1], is_classification=False) # create algorithm dfr_algorithm = daal4py.decision_forest_regression_training( fptype=getFPType(X), method='defaultDense', nTrees=int(self.n_estimators), observationsPerTreeFraction=1, featuresPerNode=int(_featuresPerNode), maxTreeDepth=int(0 if self.max_depth is None else self.max_depth), minObservationsInLeafNode=1, engine=daal_engine, impurityThreshold=float(0.0 if self.min_impurity_split is None else self.min_impurity_split), varImportance="MDI", resultsToCompute="", memorySavingMode=False, bootstrap=bool(self.bootstrap)) dfr_trainingResult = dfr_algorithm.compute(X, y) # get resulting model model = dfr_trainingResult.model self.daal_model_ = model # convert model to estimators est = DecisionTreeRegressor( criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, min_impurity_split=self.min_impurity_split, random_state=None) # we need to set est.tree_ field with Trees constructed from Intel(R) DAAL solution estimators_ = [] for i in range(self.n_estimators): est_i = clone(est) est_i.n_features_ = self.n_features_ est_i.n_outputs_ = self.n_outputs_ tree_i_state_class = daal4py.getTreeState(model, i) tree_i_state_dict = { 'max_depth': tree_i_state_class.max_depth, 'node_count': tree_i_state_class.node_count, 'nodes': tree_i_state_class.node_ar, 'values': tree_i_state_class.value_ar } est_i.tree_ = Tree(self.n_features_, np.array([1], dtype=np.intp), self.n_outputs_) est_i.tree_.__setstate__(tree_i_state_dict) estimators_.append(est_i) self.estimators_ = estimators_ # compute oob_score_ if self.oob_score: self._set_oob_score(X, y) return self
def daal_fit(self, X, y): self._check_daal_supported_parameters() _supported_dtypes_ = [np.single, np.double] X = check_array(X, dtype=_supported_dtypes_) y = np.atleast_1d(y) if y.ndim == 2 and y.shape[1] == 1: warnings.warn( "A column-vector y was passed when a 1d array was" " expected. Please change the shape of y to " "(n_samples,), for example using ravel().", DataConversionWarning, stacklevel=2) check_consistent_length(X, y) if y.ndim == 1: # reshape is necessary to preserve the data contiguity against vs # [:, np.newaxis] that does not. y = np.reshape(y, (-1, 1)) self.n_outputs_ = y.shape[1] if self.n_outputs_ != 1: _class_name = self.__class__.__name__ raise ValueError( _class_name + " does not currently support multi-output data. Consider using OneHotEncoder" ) y = check_array(y, ensure_2d=False, dtype=None) y, _ = self._validate_y_class_weight(y) self.n_classes_ = self.n_classes_[0] self.classes_ = self.classes_[0] self.n_features_ = X.shape[1] rs_ = check_random_state(self.random_state) seed_ = rs_.randint(0, np.iinfo('i').max) if self.n_classes_ < 2: raise ValueError( "Training data only contain information about one class.") # create algorithm X_fptype = getFPType(X) daal_engine_ = daal4py.engines_mt2203(seed=seed_, fptype=X_fptype) _featuresPerNode = _to_absolute_max_features(self.max_features, X.shape[1], is_classification=False) dfc_algorithm = daal4py.decision_forest_classification_training( nClasses=int(self.n_classes_), fptype=X_fptype, method='defaultDense', nTrees=int(self.n_estimators), observationsPerTreeFraction=1, featuresPerNode=int(_featuresPerNode), maxTreeDepth=int(0 if self.max_depth is None else self.max_depth), minObservationsInLeafNode=1, engine=daal_engine_, impurityThreshold=float(0.0 if self.min_impurity_split is None else self.min_impurity_split), varImportance="MDI", resultsToCompute="", memorySavingMode=False, bootstrap=bool(self.bootstrap)) # compute dfc_trainingResult = dfc_algorithm.compute(X, y) # get resulting model model = dfc_trainingResult.model self.daal_model_ = model # convert model to estimators est = DecisionTreeClassifier( criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, min_impurity_split=self.min_impurity_split, random_state=None) # we need to set est.tree_ field with Trees constructed from Intel(R) DAAL solution estimators_ = [] for i in range(self.n_estimators): # print("Tree #{}".format(i)) est_i = clone(est) est_i.n_features_ = self.n_features_ est_i.n_outputs_ = self.n_outputs_ est_i.classes_ = self.classes_ est_i.n_classes_ = self.n_classes_ # treeState members: 'class_count', 'leaf_count', 'max_depth', 'node_ar', 'node_count', 'value_ar' tree_i_state_class = daal4py.getTreeState(model, i, self.n_classes_) node_ndarray = tree_i_state_class.node_ar value_ndarray = tree_i_state_class.value_ar value_shape = (node_ndarray.shape[0], self.n_outputs_, self.n_classes_) # assert np.allclose(value_ndarray, value_ndarray.astype(np.intc, casting='unsafe')), "Value array is non-integer" tree_i_state_dict = { 'max_depth': tree_i_state_class.max_depth, 'node_count': tree_i_state_class.node_count, 'nodes': tree_i_state_class.node_ar, 'values': tree_i_state_class.value_ar } # est_i.tree_ = Tree(self.n_features_, np.array([self.n_classes_], dtype=np.intp), self.n_outputs_) est_i.tree_.__setstate__(tree_i_state_dict) estimators_.append(est_i) self.estimators_ = estimators_ # compute oob_score_ if self.oob_score: self._set_oob_score(X, y) return self