def main(readcsv=read_csv, method='defaultDense'): infile = "./data/batch/df_regression_train.csv" testfile = "./data/batch/df_regression_test.csv" # Configure a Linear regression training object train_algo = d4p.decision_forest_regression_training( nTrees=100, varImportance='MDA_Raw', bootstrap=True, engine=d4p.engines_mt2203(seed=777), resultsToCompute= 'computeOutOfBagError|computeOutOfBagErrorPerObservation') # Read data. Let's have 13 independent, and 1 dependent variables (for each observation) indep_data = readcsv(infile, range(13), t=np.float32) dep_data = readcsv(infile, range(13, 14), t=np.float32) # Now train/compute, the result provides the model for prediction train_result = train_algo.compute(indep_data, dep_data) # Traiing result provides (depending on parameters) model, outOfBagError, outOfBagErrorPerObservation and/or variableImportance # Now let's do some prediction predict_algo = d4p.decision_forest_regression_prediction() # read test data (with same #features) pdata = readcsv(testfile, range(13), t=np.float32) ptdata = readcsv(testfile, range(13, 14), t=np.float32) # now predict using the model from the training above predict_result = predict_algo.compute(pdata, train_result.model) # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) return (train_result, predict_result, ptdata)
def df_clsf_fit(X, y, n_classes, n_trees=100, seed=12345, n_features_per_node=0, max_depth=0, min_impurity=0, bootstrap=True, verbose=False): fptype = getFPType(X) features_per_node = X.shape[1] if n_features_per_node > 0 and n_features_per_node < features_per_node: features_per_node = n_features_per_node engine = engines_mt2203(seed=seed, fptype=fptype) algorithm = decision_forest_classification_training( nClasses=n_classes, fptype=fptype, method='defaultDense', nTrees=n_trees, observationsPerTreeFraction=1., featuresPerNode=features_per_node, maxTreeDepth=max_depth, minObservationsInLeafNode=1, engine=engine, impurityThreshold=min_impurity, varImportance='MDI', resultsToCompute='', memorySavingMode=False, bootstrap=bootstrap ) df_clsf_result = algorithm.compute(X, y) return df_clsf_result
def _daal_fit_regressor(self, X, y, sample_weight=None): self.n_features_ = X.shape[1] rs_ = check_random_state(self.random_state) if not self.bootstrap and self.oob_score: raise ValueError("Out of bag estimation only available" " if bootstrap=True") X_fptype = getFPType(X) seed_ = rs_.randint(0, np.iinfo('i').max) daal_engine = daal4py.engines_mt2203(seed=seed_, fptype=X_fptype) _featuresPerNode = _to_absolute_max_features(self.max_features, X.shape[1], is_classification=False) n_samples_bootstrap = _get_n_samples_bootstrap( n_samples=X.shape[0], max_samples=self.max_samples ) if sample_weight is not None: sample_weight = [sample_weight] # create algorithm dfr_algorithm = daal4py.decision_forest_regression_training( fptype = getFPType(X), method = 'defaultDense', nTrees = int(self.n_estimators), observationsPerTreeFraction = n_samples_bootstrap if self.bootstrap is True else 1., featuresPerNode = int(_featuresPerNode), maxTreeDepth = int(0 if self.max_depth is None else self.max_depth), minObservationsInLeafNode = (self.min_samples_leaf if isinstance(self.min_samples_leaf, numbers.Integral) else int(ceil(self.min_samples_leaf * X.shape[0]))), engine = daal_engine, impurityThreshold = float(0.0 if self.min_impurity_split is None else self.min_impurity_split), varImportance = "MDI", resultsToCompute = "", memorySavingMode = False, bootstrap = bool(self.bootstrap), minObservationsInSplitNode = (self.min_samples_split if isinstance(self.min_samples_split, numbers.Integral) else int(ceil(self.min_samples_split * X.shape[0]))), minWeightFractionInLeafNode = self.min_weight_fraction_leaf, minImpurityDecreaseInSplitNode = self.min_impurity_decrease, maxLeafNodes = 0 if self.max_leaf_nodes is None else self.max_leaf_nodes ) self._cached_estimators_ = None dfr_trainingResult = dfr_algorithm.compute(X, y, sample_weight) # get resulting model model = dfr_trainingResult.model self.daal_model_ = model # compute oob_score_ if self.oob_score: self.estimators_ = self._estimators_ self._set_oob_score(X, y) return self
def compute(train_data, train_labels, predict_data, method='defaultDense'): # Configure a training object train_algo = d4p.decision_forest_regression_training(nTrees=100, engine = d4p.engines_mt2203(seed=777), varImportance='MDA_Raw', bootstrap=True, resultsToCompute='computeOutOfBagError|computeOutOfBagErrorPerObservation', method=method ) # Training result provides (depending on parameters) model, outOfBagError, outOfBagErrorPerObservation and/or variableImportance train_result = train_algo.compute(train_data, train_labels) # now predict using the model from the training above predict_algo = d4p.decision_forest_regression_prediction() predict_result = predict_algo.compute(predict_data, train_result.model) return train_result, predict_result
def compute(train_data, train_labels, predict_data): # Configure a training object train_algo = d4p.decision_forest_regression_training( method='hist', maxBins=256, minBinSize=1, nTrees=100, fptype='float', varImportance='MDA_Raw', bootstrap=True, engine=d4p.engines_mt2203(seed=777), resultsToCompute= 'computeOutOfBagError|computeOutOfBagErrorPerObservation') # Training result provides (depending on parameters) model, # outOfBagError, outOfBagErrorPerObservation and/or variableImportance train_result = train_algo.compute(train_data, train_labels) # now predict using the model from the training above predict_algo = d4p.decision_forest_regression_prediction(fptype='float') predict_result = predict_algo.compute(predict_data, train_result.model) return train_result, predict_result
def _daal_fit(self, X, y): self._check_daal_supported_parameters() _supported_dtypes_ = [np.double, np.single] X = check_array(X, dtype=_supported_dtypes_) y = np.asarray(y) y = np.atleast_1d(y) if y.ndim == 2 and y.shape[1] == 1: warnings.warn("A column-vector y was passed when a 1d array was" " expected. Please change the shape of y to " "(n_samples,), for example using ravel().", DataConversionWarning, stacklevel=2) y = check_array(y, ensure_2d=False, dtype=X.dtype) check_consistent_length(X, y) if y.ndim == 1: # reshape is necessary to preserve the data contiguity against vs # [:, np.newaxis] that does not. y = np.reshape(y, (-1, 1)) self.n_outputs_ = y.shape[1] self.n_features_ = X.shape[1] rs_ = check_random_state(self.random_state) if not self.bootstrap and self.oob_score: raise ValueError("Out of bag estimation only available" " if bootstrap=True") X_fptype = getFPType(X) seed_ = rs_.randint(0, np.iinfo('i').max) daal_engine = daal4py.engines_mt2203(seed=seed_, fptype=X_fptype) _featuresPerNode = _to_absolute_max_features(self.max_features, X.shape[1], is_classification=False) # create algorithm dfr_algorithm = daal4py.decision_forest_regression_training( fptype = getFPType(X), method='defaultDense', nTrees=int(self.n_estimators), observationsPerTreeFraction=1, featuresPerNode=int(_featuresPerNode), maxTreeDepth=int(0 if self.max_depth is None else self.max_depth), minObservationsInLeafNode=1, engine=daal_engine, impurityThreshold=float(0.0 if self.min_impurity_split is None else self.min_impurity_split), varImportance="MDI", resultsToCompute="", memorySavingMode=False, bootstrap=bool(self.bootstrap) ) self._cached_estimators_ = None dfr_trainingResult = dfr_algorithm.compute(X, y) # get resulting model model = dfr_trainingResult.model self.daal_model_ = model # compute oob_score_ if self.oob_score: self._set_oob_score(X, y) return self
def _daal_fit(self, X, y): self._check_daal_supported_parameters() _supported_dtypes_ = [np.single, np.double] X = check_array(X, dtype=_supported_dtypes_) y = np.asarray(y) y = np.atleast_1d(y) if y.ndim == 2 and y.shape[1] == 1: warnings.warn("A column-vector y was passed when a 1d array was" " expected. Please change the shape of y to " "(n_samples,), for example using ravel().", DataConversionWarning, stacklevel=2) check_consistent_length(X, y) if y.ndim == 1: # reshape is necessary to preserve the data contiguity against vs # [:, np.newaxis] that does not. y = np.reshape(y, (-1, 1)) self.n_outputs_ = y.shape[1] if self.n_outputs_ != 1: _class_name = self.__class__.__name__ raise ValueError(_class_name + " does not currently support multi-output data. Consider using OneHotEncoder") y = check_array(y, ensure_2d=False, dtype=None) y, _ = self._validate_y_class_weight(y) self.n_classes_ = self.n_classes_[0] self.classes_ = self.classes_[0] self.n_features_ = X.shape[1] rs_ = check_random_state(self.random_state) seed_ = rs_.randint(0, np.iinfo('i').max) if self.n_classes_ < 2: raise ValueError("Training data only contain information about one class.") # create algorithm X_fptype = getFPType(X) daal_engine_ = daal4py.engines_mt2203(seed=seed_, fptype=X_fptype) _featuresPerNode = _to_absolute_max_features(self.max_features, X.shape[1], is_classification=True) dfc_algorithm = daal4py.decision_forest_classification_training( nClasses=int(self.n_classes_), fptype=X_fptype, method='defaultDense', nTrees=int(self.n_estimators), observationsPerTreeFraction=1, featuresPerNode=int(_featuresPerNode), maxTreeDepth=int(0 if self.max_depth is None else self.max_depth), minObservationsInLeafNode=int(self.min_samples_leaf), engine=daal_engine_, impurityThreshold=float(0.0 if self.min_impurity_split is None else self.min_impurity_split), varImportance="MDI", resultsToCompute="", memorySavingMode=False, bootstrap=bool(self.bootstrap) ) self._cached_estimators_ = None # compute dfc_trainingResult = dfc_algorithm.compute(X, y) # get resulting model model = dfc_trainingResult.model self.daal_model_ = model # compute oob_score_ if self.oob_score: self._set_oob_score(X, y) return self
def _daal_fit_regressor(self, X, y, sample_weight=None): self.n_features_in_ = X.shape[1] if not sklearn_check_version('1.0'): self.n_features_ = self.n_features_in_ rs_ = check_random_state(self.random_state) if not self.bootstrap and self.oob_score: raise ValueError("Out of bag estimation only available" " if bootstrap=True") X_fptype = getFPType(X) seed_ = rs_.randint(0, np.iinfo('i').max) # limitation on the number of stream for mt2203 is 6024 # more details here: # https://oneapi-src.github.io/oneDAL/daal/algorithms/engines/mt2203.html max_stream_count = 6024 if self.n_estimators <= max_stream_count: daal_engine = daal4py.engines_mt2203(seed=seed_, fptype=X_fptype) else: daal_engine = daal4py.engines_mt19937(seed=seed_, fptype=X_fptype) _featuresPerNode = _to_absolute_max_features(self.max_features, X.shape[1], is_classification=False) n_samples_bootstrap = _get_n_samples_bootstrap( n_samples=X.shape[0], max_samples=self.max_samples) if sample_weight is not None: sample_weight = [sample_weight] # create algorithm dfr_algorithm = daal4py.decision_forest_regression_training( fptype=getFPType(X), method='hist' if daal_check_version( (2021, 'P', 200)) else 'defaultDense', nTrees=int(self.n_estimators), observationsPerTreeFraction=n_samples_bootstrap if self.bootstrap is True else 1., featuresPerNode=int(_featuresPerNode), maxTreeDepth=int(0 if self.max_depth is None else self.max_depth), minObservationsInLeafNode=(self.min_samples_leaf if isinstance( self.min_samples_leaf, numbers.Integral) else int( ceil(self.min_samples_leaf * X.shape[0]))), engine=daal_engine, impurityThreshold=float(0.0 if self.min_impurity_split is None else self.min_impurity_split), varImportance="MDI", resultsToCompute="", memorySavingMode=False, bootstrap=bool(self.bootstrap), minObservationsInSplitNode=(self.min_samples_split if isinstance( self.min_samples_split, numbers.Integral) else int( ceil(self.min_samples_split * X.shape[0]))), minWeightFractionInLeafNode=self.min_weight_fraction_leaf, minImpurityDecreaseInSplitNode=self.min_impurity_decrease, maxLeafNodes=0 if self.max_leaf_nodes is None else self.max_leaf_nodes, maxBins=self.maxBins, minBinSize=self.minBinSize) self._cached_estimators_ = None dfr_trainingResult = dfr_algorithm.compute(X, y, sample_weight) # get resulting model model = dfr_trainingResult.model self.daal_model_ = model # compute oob_score_ #if self.oob_score: # self.estimators_ = self._estimators_ # self._set_oob_score(X, y) return self
def _daal_fit_classifier(self, X, y, sample_weight=None): y = check_array(y, ensure_2d=False, dtype=None) y, expanded_class_weight = self._validate_y_class_weight(y) n_classes_ = self.n_classes_[0] self.n_features_ = X.shape[1] if expanded_class_weight is not None: if sample_weight is not None: sample_weight = sample_weight * expanded_class_weight else: sample_weight = expanded_class_weight if sample_weight is not None: sample_weight = [sample_weight] rs_ = check_random_state(self.random_state) seed_ = rs_.randint(0, np.iinfo('i').max) if n_classes_ < 2: raise ValueError( "Training data only contain information about one class.") # create algorithm X_fptype = getFPType(X) daal_engine_ = daal4py.engines_mt2203(seed=seed_, fptype=X_fptype) features_per_node_ = _to_absolute_max_features(self.max_features, X.shape[1], is_classification=True) n_samples_bootstrap_ = _get_n_samples_bootstrap( n_samples=X.shape[0], max_samples=self.max_samples) if not self.bootstrap and self.oob_score: raise ValueError("Out of bag estimation only available" " if bootstrap=True") dfc_algorithm = daal4py.decision_forest_classification_training( nClasses=int(n_classes_), fptype=X_fptype, method='hist' if daal_check_version( (2021, 'P', 200)) else 'defaultDense', nTrees=int(self.n_estimators), observationsPerTreeFraction=n_samples_bootstrap_ if self.bootstrap is True else 1., featuresPerNode=int(features_per_node_), maxTreeDepth=int(0 if self.max_depth is None else self.max_depth), minObservationsInLeafNode=(self.min_samples_leaf if isinstance( self.min_samples_leaf, numbers.Integral) else int( ceil(self.min_samples_leaf * X.shape[0]))), engine=daal_engine_, impurityThreshold=float(0.0 if self.min_impurity_split is None else self.min_impurity_split), varImportance="MDI", resultsToCompute="", memorySavingMode=False, bootstrap=bool(self.bootstrap), minObservationsInSplitNode=(self.min_samples_split if isinstance( self.min_samples_split, numbers.Integral) else int( ceil(self.min_samples_split * X.shape[0]))), minWeightFractionInLeafNode=self.min_weight_fraction_leaf, minImpurityDecreaseInSplitNode=self.min_impurity_decrease, maxLeafNodes=0 if self.max_leaf_nodes is None else self.max_leaf_nodes, maxBins=self.maxBins, minBinSize=self.minBinSize) self._cached_estimators_ = None # compute dfc_trainingResult = dfc_algorithm.compute(X, y, sample_weight) # get resulting model model = dfc_trainingResult.model self.daal_model_ = model # compute oob_score_ if self.oob_score: self.estimators_ = self._estimators_ self._set_oob_score(X, y) return self
def _daal_fit_classifier(self, X, y, sample_weight=None): y = check_array(y, ensure_2d=False, dtype=None) y, expanded_class_weight = self._validate_y_class_weight(y) n_classes_ = self.n_classes_[0] self.n_features_in_ = X.shape[1] if not sklearn_check_version('1.0'): self.n_features_ = self.n_features_in_ if expanded_class_weight is not None: if sample_weight is not None: sample_weight = sample_weight * expanded_class_weight else: sample_weight = expanded_class_weight if sample_weight is not None: sample_weight = [sample_weight] rs_ = check_random_state(self.random_state) seed_ = rs_.randint(0, np.iinfo('i').max) if n_classes_ < 2: raise ValueError( "Training data only contain information about one class.") # create algorithm X_fptype = getFPType(X) # limitation on the number of stream for mt2203 is 6024 # more details here: # https://oneapi-src.github.io/oneDAL/daal/algorithms/engines/mt2203.html max_stream_count = 6024 if self.n_estimators <= max_stream_count: daal_engine = daal4py.engines_mt2203(seed=seed_, fptype=X_fptype) else: daal_engine = daal4py.engines_mt19937(seed=seed_, fptype=X_fptype) features_per_node_ = _to_absolute_max_features( self.max_features, X.shape[1], is_classification=True) n_samples_bootstrap_ = _get_n_samples_bootstrap( n_samples=X.shape[0], max_samples=self.max_samples ) if not self.bootstrap and self.oob_score: raise ValueError("Out of bag estimation only available" " if bootstrap=True") dfc_algorithm = daal4py.decision_forest_classification_training( nClasses=int(n_classes_), fptype=X_fptype, method='hist', nTrees=int(self.n_estimators), observationsPerTreeFraction=n_samples_bootstrap_ if self.bootstrap is True else 1., featuresPerNode=int(features_per_node_), maxTreeDepth=int(0 if self.max_depth is None else self.max_depth), minObservationsInLeafNode=(self.min_samples_leaf if isinstance( self.min_samples_leaf, numbers.Integral) else int(ceil( self.min_samples_leaf * X.shape[0]))), engine=daal_engine, impurityThreshold=float( 0.0 if self.min_impurity_split is None else self.min_impurity_split), varImportance="MDI", resultsToCompute=( "computeOutOfBagErrorAccuracy|computeOutOfBagErrorDecisionFunction" if self.oob_score else ""), memorySavingMode=False, bootstrap=bool(self.bootstrap), minObservationsInSplitNode=(self.min_samples_split if isinstance( self.min_samples_split, numbers.Integral) else int(ceil( self.min_samples_split * X.shape[0]))), minWeightFractionInLeafNode=self.min_weight_fraction_leaf, minImpurityDecreaseInSplitNode=self.min_impurity_decrease, maxLeafNodes=0 if self.max_leaf_nodes is None else self.max_leaf_nodes, maxBins=self.maxBins, minBinSize=self.minBinSize ) self._cached_estimators_ = None # compute dfc_trainingResult = dfc_algorithm.compute(X, y, sample_weight) # get resulting model model = dfc_trainingResult.model self.daal_model_ = model if self.oob_score: self.oob_score_ = dfc_trainingResult.outOfBagErrorAccuracy[0][0] self.oob_decision_function_ = dfc_trainingResult.outOfBagErrorDecisionFunction if self.oob_decision_function_.shape[-1] == 1: self.oob_decision_function_ = self.oob_decision_function_.squeeze(axis=-1) return self
def daal_fit(self, X, y): self._check_daal_supported_parameters() _supported_dtypes_ = [np.double, np.single] X = check_array(X, dtype=_supported_dtypes_) y = np.atleast_1d(y) if y.ndim == 2 and y.shape[1] == 1: warnings.warn( "A column-vector y was passed when a 1d array was" " expected. Please change the shape of y to " "(n_samples,), for example using ravel().", DataConversionWarning, stacklevel=2) y = check_array(y, ensure_2d=False, dtype=X.dtype) check_consistent_length(X, y) if y.ndim == 1: # reshape is necessary to preserve the data contiguity against vs # [:, np.newaxis] that does not. y = np.reshape(y, (-1, 1)) self.n_outputs_ = y.shape[1] self.n_features_ = X.shape[1] rs_ = check_random_state(self.random_state) if not self.bootstrap and self.oob_score: raise ValueError("Out of bag estimation only available" " if bootstrap=True") X_fptype = getFPType(X) seed_ = rs_.randint(0, np.iinfo('i').max) daal_engine = daal4py.engines_mt2203(seed=seed_, fptype=X_fptype) _featuresPerNode = _to_absolute_max_features(self.max_features, X.shape[1], is_classification=False) # create algorithm dfr_algorithm = daal4py.decision_forest_regression_training( fptype=getFPType(X), method='defaultDense', nTrees=int(self.n_estimators), observationsPerTreeFraction=1, featuresPerNode=int(_featuresPerNode), maxTreeDepth=int(0 if self.max_depth is None else self.max_depth), minObservationsInLeafNode=1, engine=daal_engine, impurityThreshold=float(0.0 if self.min_impurity_split is None else self.min_impurity_split), varImportance="MDI", resultsToCompute="", memorySavingMode=False, bootstrap=bool(self.bootstrap)) dfr_trainingResult = dfr_algorithm.compute(X, y) # get resulting model model = dfr_trainingResult.model self.daal_model_ = model # convert model to estimators est = DecisionTreeRegressor( criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, min_impurity_split=self.min_impurity_split, random_state=None) # we need to set est.tree_ field with Trees constructed from Intel(R) DAAL solution estimators_ = [] for i in range(self.n_estimators): est_i = clone(est) est_i.n_features_ = self.n_features_ est_i.n_outputs_ = self.n_outputs_ tree_i_state_class = daal4py.getTreeState(model, i) tree_i_state_dict = { 'max_depth': tree_i_state_class.max_depth, 'node_count': tree_i_state_class.node_count, 'nodes': tree_i_state_class.node_ar, 'values': tree_i_state_class.value_ar } est_i.tree_ = Tree(self.n_features_, np.array([1], dtype=np.intp), self.n_outputs_) est_i.tree_.__setstate__(tree_i_state_dict) estimators_.append(est_i) self.estimators_ = estimators_ # compute oob_score_ if self.oob_score: self._set_oob_score(X, y) return self
def daal_fit(self, X, y): self._check_daal_supported_parameters() _supported_dtypes_ = [np.single, np.double] X = check_array(X, dtype=_supported_dtypes_) y = np.atleast_1d(y) if y.ndim == 2 and y.shape[1] == 1: warnings.warn( "A column-vector y was passed when a 1d array was" " expected. Please change the shape of y to " "(n_samples,), for example using ravel().", DataConversionWarning, stacklevel=2) check_consistent_length(X, y) if y.ndim == 1: # reshape is necessary to preserve the data contiguity against vs # [:, np.newaxis] that does not. y = np.reshape(y, (-1, 1)) self.n_outputs_ = y.shape[1] if self.n_outputs_ != 1: _class_name = self.__class__.__name__ raise ValueError( _class_name + " does not currently support multi-output data. Consider using OneHotEncoder" ) y = check_array(y, ensure_2d=False, dtype=None) y, _ = self._validate_y_class_weight(y) self.n_classes_ = self.n_classes_[0] self.classes_ = self.classes_[0] self.n_features_ = X.shape[1] rs_ = check_random_state(self.random_state) seed_ = rs_.randint(0, np.iinfo('i').max) if self.n_classes_ < 2: raise ValueError( "Training data only contain information about one class.") # create algorithm X_fptype = getFPType(X) daal_engine_ = daal4py.engines_mt2203(seed=seed_, fptype=X_fptype) _featuresPerNode = _to_absolute_max_features(self.max_features, X.shape[1], is_classification=False) dfc_algorithm = daal4py.decision_forest_classification_training( nClasses=int(self.n_classes_), fptype=X_fptype, method='defaultDense', nTrees=int(self.n_estimators), observationsPerTreeFraction=1, featuresPerNode=int(_featuresPerNode), maxTreeDepth=int(0 if self.max_depth is None else self.max_depth), minObservationsInLeafNode=1, engine=daal_engine_, impurityThreshold=float(0.0 if self.min_impurity_split is None else self.min_impurity_split), varImportance="MDI", resultsToCompute="", memorySavingMode=False, bootstrap=bool(self.bootstrap)) # compute dfc_trainingResult = dfc_algorithm.compute(X, y) # get resulting model model = dfc_trainingResult.model self.daal_model_ = model # convert model to estimators est = DecisionTreeClassifier( criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, min_impurity_split=self.min_impurity_split, random_state=None) # we need to set est.tree_ field with Trees constructed from Intel(R) DAAL solution estimators_ = [] for i in range(self.n_estimators): # print("Tree #{}".format(i)) est_i = clone(est) est_i.n_features_ = self.n_features_ est_i.n_outputs_ = self.n_outputs_ est_i.classes_ = self.classes_ est_i.n_classes_ = self.n_classes_ # treeState members: 'class_count', 'leaf_count', 'max_depth', 'node_ar', 'node_count', 'value_ar' tree_i_state_class = daal4py.getTreeState(model, i, self.n_classes_) node_ndarray = tree_i_state_class.node_ar value_ndarray = tree_i_state_class.value_ar value_shape = (node_ndarray.shape[0], self.n_outputs_, self.n_classes_) # assert np.allclose(value_ndarray, value_ndarray.astype(np.intc, casting='unsafe')), "Value array is non-integer" tree_i_state_dict = { 'max_depth': tree_i_state_class.max_depth, 'node_count': tree_i_state_class.node_count, 'nodes': tree_i_state_class.node_ar, 'values': tree_i_state_class.value_ar } # est_i.tree_ = Tree(self.n_features_, np.array([self.n_classes_], dtype=np.intp), self.n_outputs_) est_i.tree_.__setstate__(tree_i_state_dict) estimators_.append(est_i) self.estimators_ = estimators_ # compute oob_score_ if self.oob_score: self._set_oob_score(X, y) return self