def test_rf_regressor_decision_path_leaf(self): model = RandomForestRegressor(n_estimators=3, max_depth=3) X, y = make_regression(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) initial_types = [('input', FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn(model, initial_types=initial_types, options={ id(model): { 'decision_leaf': True, 'decision_path': True } }, target_opset=TARGET_OPSET) sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'input': X.astype(numpy.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel(), decimal=4) dec = model.decision_path(X) exp_leaf = path_to_leaf(model.estimators_, dec[0].todense(), dec[1]) exp_path = binary_array_to_string(dec[0].todense()) got_path = numpy.array([''.join(row) for row in res[1]]) assert exp_path == got_path.ravel().tolist() assert exp_leaf.tolist() == res[2].tolist()
def train_ctax_forest(self, max_depth): """ Regression trees """ # bootstrap methods, dataset opsplitsen zodat je ook test met je testsets self.method = 'regression forest' clf = RandomForestRegressor(random_state=0, max_depth=max_depth) clf.fit(self.X_train, self.y_train) pred = clf.predict(self.X_test) clf.decision_path(self.X_train) return pred
def test_drf_regressor_backupsklearn(backend='auto'): df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True) X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') import h2o4gpu Solver = h2o4gpu.RandomForestRegressor #Run h2o4gpu version of RandomForest Regression drf = Solver(backend=backend, random_state=1234, oob_score=True) print("h2o4gpu fit()") drf.fit(X, y) #Run Sklearn version of RandomForest Regression from sklearn.ensemble import RandomForestRegressor drf_sk = RandomForestRegressor(random_state=1234, oob_score=True, max_depth=3) print("Scikit fit()") drf_sk.fit(X, y) if backend == "sklearn": assert (drf.predict(X) == drf_sk.predict(X)).all() == True assert (drf.score(X, y) == drf_sk.score(X, y)).all() == True assert (drf.decision_path(X)[1] == drf_sk.decision_path(X)[1] ).all() == True assert (drf.apply(X) == drf_sk.apply(X)).all() == True print("Estimators") print(drf.estimators_) print(drf_sk.estimators_) print("n_features") print(drf.n_features_) print(drf_sk.n_features_) assert drf.n_features_ == drf_sk.n_features_ print("n_outputs") print(drf.n_outputs_) print(drf_sk.n_outputs_) assert drf.n_outputs_ == drf_sk.n_outputs_ print("Feature importance") print(drf.feature_importances_) print(drf_sk.feature_importances_) assert (drf.feature_importances_ == drf_sk.feature_importances_ ).all() == True print("oob_score") print(drf.oob_score_) print(drf_sk.oob_score_) assert drf.oob_score_ == drf_sk.oob_score_ print("oob_prediction") print(drf.oob_prediction_) print(drf_sk.oob_prediction_) assert (drf.oob_prediction_ == drf_sk.oob_prediction_).all() == True
def test_drf_regressor_backupsklearn(backend='auto'): df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True) X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') import h2o4gpu Solver = h2o4gpu.RandomForestRegressor #Run h2o4gpu version of RandomForest Regression drf = Solver(backend=backend, random_state=1234, oob_score=True) print("h2o4gpu fit()") drf.fit(X, y) #Run Sklearn version of RandomForest Regression from sklearn.ensemble import RandomForestRegressor drf_sk = RandomForestRegressor(random_state=1234, oob_score=True, max_depth=3) print("Scikit fit()") drf_sk.fit(X, y) if backend == "sklearn": assert (drf.predict(X) == drf_sk.predict(X)).all() == True assert (drf.score(X, y) == drf_sk.score(X, y)).all() == True assert (drf.decision_path(X)[1] == drf_sk.decision_path(X)[1]).all() == True assert (drf.apply(X) == drf_sk.apply(X)).all() == True print("Estimators") print(drf.estimators_) print(drf_sk.estimators_) print("n_features") print(drf.n_features_) print(drf_sk.n_features_) assert drf.n_features_ == drf_sk.n_features_ print("n_outputs") print(drf.n_outputs_) print(drf_sk.n_outputs_) assert drf.n_outputs_ == drf_sk.n_outputs_ print("Feature importance") print(drf.feature_importances_) print(drf_sk.feature_importances_) assert (drf.feature_importances_ == drf_sk.feature_importances_).all() == True print("oob_score") print(drf.oob_score_) print(drf_sk.oob_score_) assert drf.oob_score_ == drf_sk.oob_score_ print("oob_prediction") print(drf.oob_prediction_) print(drf_sk.oob_prediction_) assert (drf.oob_prediction_ == drf_sk.oob_prediction_).all() == True
def test_randomforestregressor_decision_path(self): model = RandomForestRegressor(max_depth=2, n_estimators=2) X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) initial_types = [('input', FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( model, initial_types=initial_types, options={id(model): {'decision_path': True}}) sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'input': X.astype(numpy.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) dec = model.decision_path(X) exp = binary_array_to_string(dec[0].todense()) got = numpy.array([''.join(row) for row in res[1]]) assert exp == got.ravel().tolist()
def test_randomforestregressor_decision_path(self): model = RandomForestRegressor(max_depth=2, n_estimators=2) X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2].astype(numpy.float32) model.fit(X, y) model_onnx = to_onnx(model, X, options={id(model): { 'decision_path': True }}) sess = OnnxInference(model_onnx) res = sess.run({'X': X}) pred = model.predict(X) self.assertEqualArray(pred, res['variable'].ravel()) dec = model.decision_path(X) exp = binary_array_to_string(dec[0].todense()) got = numpy.array([''.join(row) for row in res['decision_path']]) self.assertEqual(exp, got.tolist())
def fit_local(self, X, Y=None): """Fitting and generating the local space. Parameters ---------- X : matrix of shape = [n_samples, n_features] (i.e., the feature matrix) Y : matrix of shape = [n_samples, n_outputs] (i.e., the label/output matrix) """ if self.method == 'rf': local = RandomForestRegressor(n_estimators=self.n_est,max_features='sqrt',max_depth=None, min_samples_leaf=self.stop_crit,random_state=0) print("Basic model: Random Forest \n") else: local = ExtraTreesRegressor(n_estimators=self.n_est,max_features='sqrt',max_depth=None, min_samples_leaf=self.stop_crit,random_state=0) print("Basic model: Extremely Randomized Trees \n") if Y is None: local.fit(X,X) print("Unsupervised learning \n") else: local.fit(X,Y) print("Supervised learning \n") treepath = local.decision_path(X)[0] w = treepath.sum(0) wlog = np.log(w.astype(float))+0.00001 local.cw = np.power(wlog,-1) treepath = treepath.multiply(local.cw).toarray().astype(float) # treepath = treepath.toarray().astype(float) local.ind = np.where(w<(X.shape[0]*self.dw))[1] # treepath = np.delete(treepath,local.ind,axis=1) treepath = treepath[:,local.ind] local.pca = PCA(self.dim) local.treepath = local.pca.fit_transform(treepath) return local
class WaveRandomForestRegressor(BaseEstimator, RegressorMixin): """ RandomForest based classifier but with nodes that are removed See Paper: Wavelet decomposition of Random Forests http://www.jmlr.org/papers/volume17/15-203/15-203.pdf """ def __init__( self, n_estimators=100, criterion="mse", max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features="auto", max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, verbose=0, warm_start=False, nodes_to_keep=0.9, ): self.n_estimators = n_estimators self.criterion = criterion self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.min_weight_fraction_leaf = min_weight_fraction_leaf self.max_features = max_features self.max_leaf_nodes = max_leaf_nodes self.min_impurity_decrease = min_impurity_decrease self.min_impurity_split = min_impurity_split self.bootstrap = bootstrap self.oob_score = oob_score self.n_jobs = n_jobs self.random_state = random_state self.verbose = verbose self.warm_start = warm_start self.nodes_to_keep = nodes_to_keep self.forest = None def fit(self, X, y): # 1) create RandomForest self.forest = RandomForestRegressor( n_estimators=self.n_estimators, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, min_impurity_split=self.min_impurity_split, bootstrap=self.bootstrap, oob_score=self.oob_score, n_jobs=self.n_jobs, random_state=self.random_state, verbose=self.verbose, warm_start=self.warm_start, ) # 2) fit it self.forest.fit(X, y) self.n_outputs_ = self.forest.n_outputs_ # 3) retrieve node norms and values self.nodes_norm, self.nodes_value = compute_node_norm_regression_forest( self.forest) # 4) filter nodes self._nodes_order = np.argsort(-self.nodes_norm) if self.nodes_to_keep is not None: if self.nodes_to_keep < 1: nodes_to_keep = int( len(self._nodes_order) * self.nodes_to_keep) else: nodes_to_keep = int(self.nodes_to_keep) self._ind_nodes_to_keep = self._nodes_order[:nodes_to_keep] else: self._ind_nodes_to_keep = None return self def _set_nodes_to_keep(self, nodes_to_keep): """ change the number of waweletts to keep withtout refitting the underlying random forest """ self.nodes_to_keep = nodes_to_keep if self.forest is not None: if self.nodes_to_keep is None: self._ind_nodes_to_keep = None else: if self.nodes_to_keep < 1: nodes_to_keep = int( len(self._nodes_order) * self.nodes_to_keep) else: nodes_to_keep = int(self.nodes_to_keep) self._ind_nodes_to_keep = self._nodes_order[:nodes_to_keep] def predict(self, X): if self.forest is None: raise NotFittedError("You should fit the model first") path, _ = self.forest.decision_path(X) if self._ind_nodes_to_keep is not None: predict_proba_filtered = [ path[:, self._ind_nodes_to_keep].dot( self.nodes_value[self._ind_nodes_to_keep, n, :]) for n in range(self.nodes_value.shape[1]) ] else: predict_proba_filtered = [ path[:, :].dot(self.nodes_value[:, n, :]) for n in range(self.nodes_value.shape[1]) ] if len(predict_proba_filtered) == 1: return predict_proba_filtered[0][:, 0] else: return predict_proba_filtered
class _LinearForest(BaseEstimator): """Base class for Linear Forest meta-estimator. Warning: This class should not be used directly. Use derived classes instead. """ def __init__(self, base_estimator, *, n_estimators, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_features, max_leaf_nodes, min_impurity_decrease, bootstrap, oob_score, n_jobs, random_state, ccp_alpha, max_samples): self.base_estimator = base_estimator self.n_estimators = n_estimators self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.min_weight_fraction_leaf = min_weight_fraction_leaf self.max_features = max_features self.max_leaf_nodes = max_leaf_nodes self.min_impurity_decrease = min_impurity_decrease self.bootstrap = bootstrap self.oob_score = oob_score self.n_jobs = n_jobs self.random_state = random_state self.ccp_alpha = ccp_alpha self.max_samples = max_samples def _sigmoid(self, y): """Expit function (a.k.a. logistic sigmoid). Parameters ---------- y : array-like of shape (n_samples, ) The array to apply expit to element-wise. Returns ------- y : array-like of shape (n_samples, ) Expits. """ return np.exp(y) / (1 + np.exp(y)) def _inv_sigmoid(self, y): """Logit function. Parameters ---------- y : array-like of shape (n_samples, ) The array to apply logit to element-wise. Returns ------- y : array-like of shape (n_samples, ) Logits. """ y = y.clip(1e-3, 1 - 1e-3) return np.log(y / (1 - y)) def _fit(self, X, y, sample_weight=None): """Build a Linear Boosting from the training set (X, y). Parameters ---------- X : array-like of shape (n_samples, n_features) The training input samples. y : array-like of shape (n_samples, ) or also (n_samples, n_targets) for multitarget regression. The target values (class labels in classification, real numbers in regression). sample_weight : array-like of shape (n_samples, ), default=None Sample weights. Returns ------- self : object """ if not hasattr(self.base_estimator, "fit_intercept"): raise ValueError( "Only linear models are accepted as base_estimator. " "Select one from linear_model class of scikit-learn.") if not is_regressor(self.base_estimator): raise ValueError( "Select a regressor linear model as base_estimator.") n_sample, self.n_features_in_ = X.shape if hasattr(self, "classes_"): class_to_int = dict(map(reversed, enumerate(self.classes_))) y = np.array([class_to_int[i] for i in y]) y = self._inv_sigmoid(y) self.base_estimator_ = deepcopy(self.base_estimator) self.base_estimator_.fit(X, y, sample_weight) resid = y - self.base_estimator_.predict(X) criterion = "squared_error" if _sklearn_v1 else "mse" self.forest_estimator_ = RandomForestRegressor( n_estimators=self.n_estimators, criterion=criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, bootstrap=self.bootstrap, oob_score=self.oob_score, n_jobs=self.n_jobs, random_state=self.random_state, ccp_alpha=self.ccp_alpha, max_samples=self.max_samples, ) self.forest_estimator_.fit(X, resid, sample_weight) if hasattr(self.base_estimator_, "coef_"): self.coef_ = self.base_estimator_.coef_ if hasattr(self.base_estimator_, "intercept_"): self.intercept_ = self.base_estimator_.intercept_ self.feature_importances_ = self.forest_estimator_.feature_importances_ return self def apply(self, X): """Apply trees in the forest to X, return leaf indices. Parameters ---------- X : array-like of shape (n_samples, n_features) The input samples. Returns ------- X_leaves : ndarray of shape (n_samples, n_estimators) For each datapoint x in X and for each tree in the forest, return the index of the leaf x ends up in. """ check_is_fitted(self, attributes="base_estimator_") return self.forest_estimator_.apply(X) def decision_path(self, X): """Return the decision path in the forest. Parameters ---------- X : array-like of shape (n_samples, n_features) The input samples. Returns ------- indicator : sparse matrix of shape (n_samples, n_nodes) Return a node indicator matrix where non zero elements indicates that the samples goes through the nodes. The matrix is of CSR format. n_nodes_ptr : ndarray of shape (n_estimators + 1, ) The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]] gives the indicator value for the i-th estimator. """ check_is_fitted(self, attributes="base_estimator_") return self.forest_estimator_.decision_path(X)
class QuantileRandomForestRegressor: """A quantile random forest regressor based on the scikit-learn RandomForestRegressor A wrapper around the RandomForestRegressor which summarizes based on quantiles rather than the mean. Note that quantile predicitons take much longer than mean predictions. Parameters ---------- nthreads : int, default=1 number of threads to used rf_kwargs : array or array like kwargs to be passed to the RandomForestRegressor See Also -------- https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html?highlight=randomforestregressor#sklearn.ensemble.RandomForestRegressor.apply """ def __init__(self, nthreads=1, **rf_kwargs): rf_kwargs['n_jobs'] = nthreads self.forest = RandomForestRegressor(**rf_kwargs) set_num_threads(nthreads) def fit(self, X, y, sample_weight=None): """ Build a forest of trees from the training set (X, y). Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The training input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csc_matrix``. y : array-like of shape (n_samples,) or (n_samples, n_outputs) The target values (class labels in classification, real numbers in regression). sample_weight : array-like of shape (n_samples,), default=None Sample weights. If None, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. In the case of classification, splits are also ignored if they would result in any single class carrying a negative weight in either child node. Returns ------- self : object """ self.forest.fit(X, y, sample_weight) self.trainy = y.copy() self.trainX = X.copy() def predict(self, X, qntl): """ Predict regression target for X. The predicted regression target of an input sample is computed as the quantile predicted regression targets of the trees in the forest. Note: Not possible for multioutput regression. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``. qntl : {array-like} of shape (n_quantiles) Quantile or sequence of quantiles to compute, which must be between 0 and 1 inclusive. Passed to numpy.quantile. Returns ------- y : ndarray of shape (n_samples, n_quantiles) The predicted values. """ if len(self.trainy.shape)>1: raise RuntimeError("Quantile prediction is not possible with multioutput regression.") qntl = np.asanyarray(qntl) ntrees = self.forest.n_estimators ntrain = self.trainy.shape[0] train_tree_node_ID = np.zeros([ntrain, ntrees]) npred = X.shape[0] pred_tree_node_ID = np.zeros([npred, ntrees]) for i in range(ntrees): train_tree_node_ID[:, i] = self.forest.estimators_[i].apply(self.trainX) pred_tree_node_ID[:, i] = self.forest.estimators_[i].apply(X) ypred_pcts = find_quant(self.trainy, train_tree_node_ID, pred_tree_node_ID, qntl) return ypred_pcts def predict_sample(self, X, n_draws): """ Predict regression target for X. The predicted regression target of an input sample is computed as a random sample of the predicted regression targets of the trees in the forest. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``. n_sample : {int} number of sample to draw from the predicted regression targets Returns ------- y : ndarray of shape (n_samples, n_draws) or (n_samples, n_outputs, n_draws) The predicted values. """ ntrees = self.forest.n_estimators ntrain = self.trainy.shape[0] train_tree_node_ID = np.zeros([ntrain, ntrees]) npred = X.shape[0] pred_tree_node_ID = np.zeros([npred, ntrees]) for i in range(ntrees): train_tree_node_ID[:, i] = self.forest.estimators_[i].apply(self.trainX) pred_tree_node_ID[:, i] = self.forest.estimators_[i].apply(X) ypred_draws = find_sample(self.trainy, train_tree_node_ID, pred_tree_node_ID, n_draws) return ypred_draws def apply(self, X): """ wrapper for sklearn.ensemble.RandomForestRegressor.apply Apply trees in the forest to X, return leaf indices. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``. Returns ------- X_leaves : ndarray of shape (n_samples, n_estimators) For each datapoint x in X and for each tree in the forest, return the index of the leaf x ends up in. """ return self.forest.apply(X) def decision_path(self, X): """ wrapper for sklearn.ensemble.RandomForestRegressor.decision_path Return the decision path in the forest. .. versionadded:: 0.18 Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) The input samples. Internally, its dtype will be converted to ``dtype=np.float32``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``. Returns ------- indicator : sparse matrix of shape (n_samples, n_nodes) Return a node indicator matrix where non zero elements indicates that the samples goes through the nodes. The matrix is of CSR format. n_nodes_ptr : ndarray of shape (n_estimators + 1,) The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]] gives the indicator value for the i-th estimator. """ return self.forest.decision_path(X) def set_params(self, **params): """ wrapper for sklearn.ensemble.RandomForestRegressor.set_params Set the parameters of this estimator. The method works on simple estimators as well as on nested objects (such as pipelines). The latter have parameters of the form ``<component>__<parameter>`` so that it's possible to update each component of a nested object. Parameters ---------- **params : dict Estimator parameters. Returns ------- self : object Estimator instance. """ return self.forestset_params(**params)
def RandomForest_regression(self): model = RFR(n_estimators=1000, max_depth=10) model.fit(self.train_X, self.train_y) path = model.decision_path(self.train_X) self.y_pre_train = model.predict(self.train_X) self.y_pre_valid = model.predict(self.valid_X)
# coding:utf-8 from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from sklearn.datasets import make_regression ''' 使用随机森林回归算法进行预测计算 ''' X, Y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False) regr = RandomForestRegressor(max_depth=2, random_state=0) regr.fit(X, Y) # 默认是用的参数 # RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2, # max_features='auto', max_leaf_nodes=None, # min_impurity_decrease=0.0, min_impurity_split=None, # min_samples_leaf=1, min_samples_split=2, # min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1, # oob_score=False, random_state=0, verbose=0, warm_start=False) print(regr.feature_importances_) print(regr.predict([[0, 0, 0, 0], [1, 1, 1, 1]])) print(type(regr.decision_path(X)[1]))
num_features = regr.n_features_ # the number of features num_outputs = regr.n_outputs_ # the number of outputs when the model is built #oob_score = regr.oob_score_ # score the training dataset using an out-of-bag estimator, this computes the average of correct classifications # basically the coefficent of determination of R**2 using 'unseen' data not used to build the model #oob_predict = regr.oob_prediction_ # The prediction for the values of training dataset using the oob method # now having a look at the methods leaf_indices = regr.apply( x_test ) # get the numbers of the all the leaves the test dataset ends up in decision_path = regr.decision_path(x_test) parameters = regr.get_params() # the parameters of the model predicted_age_array = regr.predict( x_test ) # running the test dataset through the model, giving an array of predicted values r_2_train = regr.score( x_train, y_train) # calculating the R squared of the train dataset r_2_test = regr.score(x_test, y_test) # calculating the R squared of the test dataset set_params = regr.set_params() # set the parameters for the model # print the R squared
"node %s." % (node_depth[i] * "\t", i, children_left[i], feature[i], threshold[i], children_right[i], )) print() # First let's retrieve the decision path of each sample. The decision_path # method allows to retrieve the node indicator functions. A non zero element of # indicator matrix at the position (i, j) indicates that the sample i goes # through the node j. node_indicator = estimator.decision_path(X_test) # Similarly, we can also have the leaves ids reached by each sample. leave_id = estimator.apply(X_test) # Now, it's possible to get the tests that were used to predict a sample or # a group of samples. First, let's make it for the sample. sample_id = 0 node_index = node_indicator.indices[node_indicator.indptr[sample_id]: node_indicator.indptr[sample_id + 1]] print('Rules used to predict sample %s: ' % sample_id) for node_id in node_index: if leave_id[sample_id] == node_id:
def RandomForest_regression(self): rfr = RFR(n_estimators=1000, max_depth=4) rfr.fit(self.train_X, self.train_y) path = rfr.decision_path(self.train_X) self.y_pre_train = rfr.predict(self.train_X) self.y_pre_test = rfr.predict(self.test_X)