def test_rf_regressor_decision_path_leaf(self):
     model = RandomForestRegressor(n_estimators=3, max_depth=3)
     X, y = make_regression(10, n_features=4, random_state=42)
     X = X[:, :2]
     model.fit(X, y)
     initial_types = [('input', FloatTensorType((None, X.shape[1])))]
     model_onnx = convert_sklearn(model,
                                  initial_types=initial_types,
                                  options={
                                      id(model): {
                                          'decision_leaf': True,
                                          'decision_path': True
                                      }
                                  },
                                  target_opset=TARGET_OPSET)
     sess = InferenceSession(model_onnx.SerializeToString())
     res = sess.run(None, {'input': X.astype(numpy.float32)})
     pred = model.predict(X)
     assert_almost_equal(pred, res[0].ravel(), decimal=4)
     dec = model.decision_path(X)
     exp_leaf = path_to_leaf(model.estimators_, dec[0].todense(), dec[1])
     exp_path = binary_array_to_string(dec[0].todense())
     got_path = numpy.array([''.join(row) for row in res[1]])
     assert exp_path == got_path.ravel().tolist()
     assert exp_leaf.tolist() == res[2].tolist()
Ejemplo n.º 2
0
    def train_ctax_forest(self, max_depth):
        """
        Regression trees
        """

        # bootstrap methods, dataset opsplitsen zodat je ook test met je testsets
        self.method = 'regression forest'

        clf = RandomForestRegressor(random_state=0, max_depth=max_depth)

        clf.fit(self.X_train, self.y_train)

        pred = clf.predict(self.X_test)

        clf.decision_path(self.X_train)

        return pred
Ejemplo n.º 3
0
def test_drf_regressor_backupsklearn(backend='auto'):
    df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True)
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    import h2o4gpu
    Solver = h2o4gpu.RandomForestRegressor

    #Run h2o4gpu version of RandomForest Regression
    drf = Solver(backend=backend, random_state=1234, oob_score=True)
    print("h2o4gpu fit()")
    drf.fit(X, y)

    #Run Sklearn version of RandomForest Regression
    from sklearn.ensemble import RandomForestRegressor
    drf_sk = RandomForestRegressor(random_state=1234,
                                   oob_score=True,
                                   max_depth=3)
    print("Scikit fit()")
    drf_sk.fit(X, y)

    if backend == "sklearn":
        assert (drf.predict(X) == drf_sk.predict(X)).all() == True
        assert (drf.score(X, y) == drf_sk.score(X, y)).all() == True
        assert (drf.decision_path(X)[1] == drf_sk.decision_path(X)[1]
                ).all() == True
        assert (drf.apply(X) == drf_sk.apply(X)).all() == True

        print("Estimators")
        print(drf.estimators_)
        print(drf_sk.estimators_)

        print("n_features")
        print(drf.n_features_)
        print(drf_sk.n_features_)
        assert drf.n_features_ == drf_sk.n_features_

        print("n_outputs")
        print(drf.n_outputs_)
        print(drf_sk.n_outputs_)
        assert drf.n_outputs_ == drf_sk.n_outputs_

        print("Feature importance")
        print(drf.feature_importances_)
        print(drf_sk.feature_importances_)
        assert (drf.feature_importances_ == drf_sk.feature_importances_
                ).all() == True

        print("oob_score")
        print(drf.oob_score_)
        print(drf_sk.oob_score_)
        assert drf.oob_score_ == drf_sk.oob_score_

        print("oob_prediction")
        print(drf.oob_prediction_)
        print(drf_sk.oob_prediction_)
        assert (drf.oob_prediction_ == drf_sk.oob_prediction_).all() == True
Ejemplo n.º 4
0
def test_drf_regressor_backupsklearn(backend='auto'):
    df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True)
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    import h2o4gpu
    Solver = h2o4gpu.RandomForestRegressor

    #Run h2o4gpu version of RandomForest Regression
    drf = Solver(backend=backend, random_state=1234, oob_score=True)
    print("h2o4gpu fit()")
    drf.fit(X, y)

    #Run Sklearn version of RandomForest Regression
    from sklearn.ensemble import RandomForestRegressor
    drf_sk = RandomForestRegressor(random_state=1234, oob_score=True, max_depth=3)
    print("Scikit fit()")
    drf_sk.fit(X, y)

    if backend == "sklearn":
        assert (drf.predict(X) == drf_sk.predict(X)).all() == True
        assert (drf.score(X, y) == drf_sk.score(X, y)).all() == True
        assert (drf.decision_path(X)[1] == drf_sk.decision_path(X)[1]).all() == True
        assert (drf.apply(X) == drf_sk.apply(X)).all() == True

        print("Estimators")
        print(drf.estimators_)
        print(drf_sk.estimators_)

        print("n_features")
        print(drf.n_features_)
        print(drf_sk.n_features_)
        assert drf.n_features_ == drf_sk.n_features_

        print("n_outputs")
        print(drf.n_outputs_)
        print(drf_sk.n_outputs_)
        assert drf.n_outputs_ == drf_sk.n_outputs_

        print("Feature importance")
        print(drf.feature_importances_)
        print(drf_sk.feature_importances_)
        assert (drf.feature_importances_ == drf_sk.feature_importances_).all() == True

        print("oob_score")
        print(drf.oob_score_)
        print(drf_sk.oob_score_)
        assert drf.oob_score_ == drf_sk.oob_score_

        print("oob_prediction")
        print(drf.oob_prediction_)
        print(drf_sk.oob_prediction_)
        assert (drf.oob_prediction_ == drf_sk.oob_prediction_).all() == True
Ejemplo n.º 5
0
 def test_randomforestregressor_decision_path(self):
     model = RandomForestRegressor(max_depth=2, n_estimators=2)
     X, y = make_classification(10, n_features=4, random_state=42)
     X = X[:, :2]
     model.fit(X, y)
     initial_types = [('input', FloatTensorType((None, X.shape[1])))]
     model_onnx = convert_sklearn(
         model, initial_types=initial_types,
         options={id(model): {'decision_path': True}})
     sess = InferenceSession(model_onnx.SerializeToString())
     res = sess.run(None, {'input': X.astype(numpy.float32)})
     pred = model.predict(X)
     assert_almost_equal(pred, res[0].ravel())
     dec = model.decision_path(X)
     exp = binary_array_to_string(dec[0].todense())
     got = numpy.array([''.join(row) for row in res[1]])
     assert exp == got.ravel().tolist()
Ejemplo n.º 6
0
 def test_randomforestregressor_decision_path(self):
     model = RandomForestRegressor(max_depth=2, n_estimators=2)
     X, y = make_classification(10, n_features=4, random_state=42)
     X = X[:, :2].astype(numpy.float32)
     model.fit(X, y)
     model_onnx = to_onnx(model,
                          X,
                          options={id(model): {
                                       'decision_path': True
                                   }})
     sess = OnnxInference(model_onnx)
     res = sess.run({'X': X})
     pred = model.predict(X)
     self.assertEqualArray(pred, res['variable'].ravel())
     dec = model.decision_path(X)
     exp = binary_array_to_string(dec[0].todense())
     got = numpy.array([''.join(row) for row in res['decision_path']])
     self.assertEqual(exp, got.tolist())
Ejemplo n.º 7
0
    def fit_local(self, X, Y=None):
        """Fitting and generating the local space.
    
        Parameters
        ----------
        X : matrix of shape = [n_samples, n_features]
        (i.e., the feature matrix)

        Y : matrix of shape = [n_samples, n_outputs]
        (i.e., the label/output matrix)
        """
        
        if self.method == 'rf':
            local = RandomForestRegressor(n_estimators=self.n_est,max_features='sqrt',max_depth=None, min_samples_leaf=self.stop_crit,random_state=0)
            print("Basic model: Random Forest \n")
        else:
            local = ExtraTreesRegressor(n_estimators=self.n_est,max_features='sqrt',max_depth=None, min_samples_leaf=self.stop_crit,random_state=0)
            print("Basic model: Extremely Randomized Trees \n")

        if Y is None:
            local.fit(X,X)
            print("Unsupervised learning \n")
        else:
            local.fit(X,Y)
            print("Supervised learning \n")
            
        treepath = local.decision_path(X)[0]
        w = treepath.sum(0)
        wlog = np.log(w.astype(float))+0.00001            
        local.cw =  np.power(wlog,-1)            
        treepath = treepath.multiply(local.cw).toarray().astype(float)
#        treepath = treepath.toarray().astype(float)
        local.ind = np.where(w<(X.shape[0]*self.dw))[1]
#        treepath = np.delete(treepath,local.ind,axis=1)
        treepath = treepath[:,local.ind]
        
        local.pca = PCA(self.dim)
        local.treepath = local.pca.fit_transform(treepath)
        
        return local          
Ejemplo n.º 8
0
class WaveRandomForestRegressor(BaseEstimator, RegressorMixin):
    """
    RandomForest based classifier but with nodes that are removed
    
    See Paper:
    Wavelet decomposition of Random Forests
    http://www.jmlr.org/papers/volume17/15-203/15-203.pdf
    """
    def __init__(
        self,
        n_estimators=100,
        criterion="mse",
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        min_weight_fraction_leaf=0.0,
        max_features="auto",
        max_leaf_nodes=None,
        min_impurity_decrease=0.0,
        min_impurity_split=None,
        bootstrap=True,
        oob_score=False,
        n_jobs=1,
        random_state=None,
        verbose=0,
        warm_start=False,
        nodes_to_keep=0.9,
    ):

        self.n_estimators = n_estimators
        self.criterion = criterion
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.max_features = max_features
        self.max_leaf_nodes = max_leaf_nodes
        self.min_impurity_decrease = min_impurity_decrease
        self.min_impurity_split = min_impurity_split
        self.bootstrap = bootstrap
        self.oob_score = oob_score
        self.n_jobs = n_jobs
        self.random_state = random_state
        self.verbose = verbose
        self.warm_start = warm_start
        self.nodes_to_keep = nodes_to_keep

        self.forest = None

    def fit(self, X, y):

        # 1) create RandomForest
        self.forest = RandomForestRegressor(
            n_estimators=self.n_estimators,
            criterion=self.criterion,
            max_depth=self.max_depth,
            min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf,
            min_weight_fraction_leaf=self.min_weight_fraction_leaf,
            max_features=self.max_features,
            max_leaf_nodes=self.max_leaf_nodes,
            min_impurity_decrease=self.min_impurity_decrease,
            min_impurity_split=self.min_impurity_split,
            bootstrap=self.bootstrap,
            oob_score=self.oob_score,
            n_jobs=self.n_jobs,
            random_state=self.random_state,
            verbose=self.verbose,
            warm_start=self.warm_start,
        )

        # 2) fit it
        self.forest.fit(X, y)

        self.n_outputs_ = self.forest.n_outputs_

        # 3) retrieve node norms and values
        self.nodes_norm, self.nodes_value = compute_node_norm_regression_forest(
            self.forest)

        # 4) filter nodes
        self._nodes_order = np.argsort(-self.nodes_norm)

        if self.nodes_to_keep is not None:
            if self.nodes_to_keep < 1:
                nodes_to_keep = int(
                    len(self._nodes_order) * self.nodes_to_keep)
            else:
                nodes_to_keep = int(self.nodes_to_keep)

            self._ind_nodes_to_keep = self._nodes_order[:nodes_to_keep]
        else:
            self._ind_nodes_to_keep = None

        return self

    def _set_nodes_to_keep(self, nodes_to_keep):
        """ change the number of waweletts to keep withtout refitting the underlying random forest """
        self.nodes_to_keep = nodes_to_keep

        if self.forest is not None:

            if self.nodes_to_keep is None:
                self._ind_nodes_to_keep = None

            else:
                if self.nodes_to_keep < 1:
                    nodes_to_keep = int(
                        len(self._nodes_order) * self.nodes_to_keep)
                else:
                    nodes_to_keep = int(self.nodes_to_keep)

            self._ind_nodes_to_keep = self._nodes_order[:nodes_to_keep]

    def predict(self, X):

        if self.forest is None:
            raise NotFittedError("You should fit the model first")

        path, _ = self.forest.decision_path(X)

        if self._ind_nodes_to_keep is not None:
            predict_proba_filtered = [
                path[:, self._ind_nodes_to_keep].dot(
                    self.nodes_value[self._ind_nodes_to_keep, n, :])
                for n in range(self.nodes_value.shape[1])
            ]
        else:
            predict_proba_filtered = [
                path[:, :].dot(self.nodes_value[:, n, :])
                for n in range(self.nodes_value.shape[1])
            ]

        if len(predict_proba_filtered) == 1:
            return predict_proba_filtered[0][:, 0]
        else:
            return predict_proba_filtered
Ejemplo n.º 9
0
class _LinearForest(BaseEstimator):
    """Base class for Linear Forest meta-estimator.

    Warning: This class should not be used directly. Use derived classes
    instead.
    """
    def __init__(self, base_estimator, *, n_estimators, max_depth,
                 min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
                 max_features, max_leaf_nodes, min_impurity_decrease,
                 bootstrap, oob_score, n_jobs, random_state, ccp_alpha,
                 max_samples):

        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.max_features = max_features
        self.max_leaf_nodes = max_leaf_nodes
        self.min_impurity_decrease = min_impurity_decrease
        self.bootstrap = bootstrap
        self.oob_score = oob_score
        self.n_jobs = n_jobs
        self.random_state = random_state
        self.ccp_alpha = ccp_alpha
        self.max_samples = max_samples

    def _sigmoid(self, y):
        """Expit function (a.k.a. logistic sigmoid).

        Parameters
        ----------
        y : array-like of shape (n_samples, )
            The array to apply expit to element-wise.

        Returns
        -------
        y : array-like of shape (n_samples, )
            Expits.
        """
        return np.exp(y) / (1 + np.exp(y))

    def _inv_sigmoid(self, y):
        """Logit function.

        Parameters
        ----------
        y : array-like of shape (n_samples, )
            The array to apply logit to element-wise.

        Returns
        -------
        y : array-like of shape (n_samples, )
            Logits.
        """
        y = y.clip(1e-3, 1 - 1e-3)

        return np.log(y / (1 - y))

    def _fit(self, X, y, sample_weight=None):
        """Build a Linear Boosting from the training set (X, y).

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            The training input samples.

        y : array-like of shape (n_samples, ) or also (n_samples, n_targets) for
            multitarget regression.
            The target values (class labels in classification, real numbers in
            regression).

        sample_weight : array-like of shape (n_samples, ), default=None
            Sample weights.

        Returns
        -------
        self : object
        """
        if not hasattr(self.base_estimator, "fit_intercept"):
            raise ValueError(
                "Only linear models are accepted as base_estimator. "
                "Select one from linear_model class of scikit-learn.")

        if not is_regressor(self.base_estimator):
            raise ValueError(
                "Select a regressor linear model as base_estimator.")

        n_sample, self.n_features_in_ = X.shape

        if hasattr(self, "classes_"):
            class_to_int = dict(map(reversed, enumerate(self.classes_)))
            y = np.array([class_to_int[i] for i in y])
            y = self._inv_sigmoid(y)

        self.base_estimator_ = deepcopy(self.base_estimator)
        self.base_estimator_.fit(X, y, sample_weight)
        resid = y - self.base_estimator_.predict(X)

        criterion = "squared_error" if _sklearn_v1 else "mse"

        self.forest_estimator_ = RandomForestRegressor(
            n_estimators=self.n_estimators,
            criterion=criterion,
            max_depth=self.max_depth,
            min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf,
            min_weight_fraction_leaf=self.min_weight_fraction_leaf,
            max_features=self.max_features,
            max_leaf_nodes=self.max_leaf_nodes,
            min_impurity_decrease=self.min_impurity_decrease,
            bootstrap=self.bootstrap,
            oob_score=self.oob_score,
            n_jobs=self.n_jobs,
            random_state=self.random_state,
            ccp_alpha=self.ccp_alpha,
            max_samples=self.max_samples,
        )
        self.forest_estimator_.fit(X, resid, sample_weight)

        if hasattr(self.base_estimator_, "coef_"):
            self.coef_ = self.base_estimator_.coef_

        if hasattr(self.base_estimator_, "intercept_"):
            self.intercept_ = self.base_estimator_.intercept_

        self.feature_importances_ = self.forest_estimator_.feature_importances_

        return self

    def apply(self, X):
        """Apply trees in the forest to X, return leaf indices.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            The input samples.

        Returns
        -------
        X_leaves : ndarray of shape (n_samples, n_estimators)
            For each datapoint x in X and for each tree in the forest,
            return the index of the leaf x ends up in.
        """
        check_is_fitted(self, attributes="base_estimator_")

        return self.forest_estimator_.apply(X)

    def decision_path(self, X):
        """Return the decision path in the forest.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            The input samples.

        Returns
        -------
        indicator : sparse matrix of shape (n_samples, n_nodes)
            Return a node indicator matrix where non zero elements indicates
            that the samples goes through the nodes. The matrix is of CSR
            format.

        n_nodes_ptr : ndarray of shape (n_estimators + 1, )
            The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]]
            gives the indicator value for the i-th estimator.
        """
        check_is_fitted(self, attributes="base_estimator_")

        return self.forest_estimator_.decision_path(X)
Ejemplo n.º 10
0
class QuantileRandomForestRegressor:
    """A quantile random forest regressor based on the scikit-learn RandomForestRegressor
    
    A wrapper around the RandomForestRegressor which summarizes based on quantiles rather than
    the mean. Note that quantile predicitons take much longer than mean predictions.

    Parameters
    ----------
    nthreads : int, default=1
        number of threads to used
    rf_kwargs : array or array like
        kwargs to be passed to the RandomForestRegressor
    
    See Also
    --------
    https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html?highlight=randomforestregressor#sklearn.ensemble.RandomForestRegressor.apply
    """
    def __init__(self, nthreads=1, **rf_kwargs):
        rf_kwargs['n_jobs'] = nthreads
        self.forest = RandomForestRegressor(**rf_kwargs)
        set_num_threads(nthreads)

    def fit(self, X, y, sample_weight=None):
        """
        Build a forest of trees from the training set (X, y).
        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Internally, its dtype will be converted
            to ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csc_matrix``.
        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            The target values (class labels in classification, real numbers in
            regression).
        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights. If None, then samples are equally weighted. Splits
            that would create child nodes with net zero or negative weight are
            ignored while searching for a split in each node. In the case of
            classification, splits are also ignored if they would result in any
            single class carrying a negative weight in either child node.
        Returns
        -------
        self : object
        """
        self.forest.fit(X, y, sample_weight)
        self.trainy = y.copy()
        self.trainX = X.copy()

    def predict(self, X, qntl):
        """
        Predict regression target for X.
        The predicted regression target of an input sample is computed as the
        quantile predicted regression targets of the trees in the forest.
        
        Note: Not possible for multioutput regression.
        
        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.
        qntl : {array-like} of shape (n_quantiles)
            Quantile or sequence of quantiles to compute, which must be between
            0 and 1 inclusive. Passed to numpy.quantile.
        Returns
        -------
        y : ndarray of shape (n_samples, n_quantiles)
            The predicted values.
        """
        if len(self.trainy.shape)>1:
            raise RuntimeError("Quantile prediction is not possible with multioutput regression.")
        
        qntl = np.asanyarray(qntl)
        ntrees = self.forest.n_estimators
        ntrain = self.trainy.shape[0]
        train_tree_node_ID = np.zeros([ntrain, ntrees])
        npred = X.shape[0]
        pred_tree_node_ID = np.zeros([npred, ntrees])

        for i in range(ntrees):
            train_tree_node_ID[:, i] = self.forest.estimators_[i].apply(self.trainX)
            pred_tree_node_ID[:, i] = self.forest.estimators_[i].apply(X)

        ypred_pcts = find_quant(self.trainy, train_tree_node_ID,
                                pred_tree_node_ID, qntl)

        return ypred_pcts
    
    def predict_sample(self, X, n_draws):
        """
        Predict regression target for X.
        The predicted regression target of an input sample is computed as a
        random sample of the predicted regression targets of the trees in the forest.
        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.
        n_sample : {int}
            number of sample to draw from the predicted regression targets
        Returns
        -------
        y : ndarray of shape (n_samples, n_draws) or (n_samples, n_outputs, n_draws)
            The predicted values.
        """
        ntrees = self.forest.n_estimators
        ntrain = self.trainy.shape[0]
        train_tree_node_ID = np.zeros([ntrain, ntrees])
        npred = X.shape[0]
        pred_tree_node_ID = np.zeros([npred, ntrees])

        for i in range(ntrees):
            train_tree_node_ID[:, i] = self.forest.estimators_[i].apply(self.trainX)
            pred_tree_node_ID[:, i] = self.forest.estimators_[i].apply(X)

        ypred_draws = find_sample(self.trainy, train_tree_node_ID,
                                pred_tree_node_ID, n_draws)

        return ypred_draws

    def apply(self, X):
        """
        wrapper for sklearn.ensemble.RandomForestRegressor.apply

        Apply trees in the forest to X, return leaf indices.
        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.
        Returns
        -------
        X_leaves : ndarray of shape (n_samples, n_estimators)
            For each datapoint x in X and for each tree in the forest,
            return the index of the leaf x ends up in.
        """
        return self.forest.apply(X)

    def decision_path(self, X):
        """
        wrapper for sklearn.ensemble.RandomForestRegressor.decision_path

        Return the decision path in the forest.
        .. versionadded:: 0.18
        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.
        Returns
        -------
        indicator : sparse matrix of shape (n_samples, n_nodes)
            Return a node indicator matrix where non zero elements indicates
            that the samples goes through the nodes. The matrix is of CSR
            format.
        n_nodes_ptr : ndarray of shape (n_estimators + 1,)
            The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]]
            gives the indicator value for the i-th estimator.
        """
        return self.forest.decision_path(X)

    def set_params(self, **params):
        """
        wrapper for sklearn.ensemble.RandomForestRegressor.set_params

        Set the parameters of this estimator.
        The method works on simple estimators as well as on nested objects
        (such as pipelines). The latter have parameters of the form
        ``<component>__<parameter>`` so that it's possible to update each
        component of a nested object.
        Parameters
        ----------
        **params : dict
            Estimator parameters.
        Returns
        -------
        self : object
            Estimator instance.
        """
        return self.forestset_params(**params)
Ejemplo n.º 11
0
 def RandomForest_regression(self):
     model = RFR(n_estimators=1000, max_depth=10)
     model.fit(self.train_X, self.train_y)
     path = model.decision_path(self.train_X)
     self.y_pre_train = model.predict(self.train_X)
     self.y_pre_valid = model.predict(self.valid_X)
Ejemplo n.º 12
0
# coding:utf-8

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression
'''
使用随机森林回归算法进行预测计算
'''

X, Y = make_regression(n_features=4,
                       n_informative=2,
                       random_state=0,
                       shuffle=False)
regr = RandomForestRegressor(max_depth=2, random_state=0)
regr.fit(X, Y)
# 默认是用的参数
# RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2,
#                       max_features='auto', max_leaf_nodes=None,
#                       min_impurity_decrease=0.0, min_impurity_split=None,
#                       min_samples_leaf=1, min_samples_split=2,
#                       min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
#                       oob_score=False, random_state=0, verbose=0, warm_start=False)

print(regr.feature_importances_)

print(regr.predict([[0, 0, 0, 0], [1, 1, 1, 1]]))
print(type(regr.decision_path(X)[1]))
Ejemplo n.º 13
0
num_features = regr.n_features_  # the number of features

num_outputs = regr.n_outputs_  # the number of outputs when the model is built

#oob_score = regr.oob_score_ # score the training dataset using an out-of-bag estimator, this computes the average of correct classifications
# basically the coefficent of determination of R**2 using 'unseen' data not used to build the model

#oob_predict = regr.oob_prediction_ # The prediction for the values of training dataset using the oob method

# now having a look at the methods
leaf_indices = regr.apply(
    x_test
)  # get the numbers of the all the leaves the test dataset ends up in

decision_path = regr.decision_path(x_test)

parameters = regr.get_params()  # the parameters of the model

predicted_age_array = regr.predict(
    x_test
)  # running the test dataset through the model, giving an array of predicted values

r_2_train = regr.score(
    x_train, y_train)  # calculating the R squared of the train dataset
r_2_test = regr.score(x_test,
                      y_test)  # calculating the R squared of the test dataset

set_params = regr.set_params()  # set the parameters for the model

# print the R squared
Ejemplo n.º 14
0
              "node %s."
              % (node_depth[i] * "\t",
                 i,
                 children_left[i],
                 feature[i],
                 threshold[i],
                 children_right[i],
                 ))
print()

# First let's retrieve the decision path of each sample. The decision_path
# method allows to retrieve the node indicator functions. A non zero element of
# indicator matrix at the position (i, j) indicates that the sample i goes
# through the node j.

node_indicator = estimator.decision_path(X_test)

# Similarly, we can also have the leaves ids reached by each sample.

leave_id = estimator.apply(X_test)

# Now, it's possible to get the tests that were used to predict a sample or
# a group of samples. First, let's make it for the sample.

sample_id = 0
node_index = node_indicator.indices[node_indicator.indptr[sample_id]:
                                    node_indicator.indptr[sample_id + 1]]

print('Rules used to predict sample %s: ' % sample_id)
for node_id in node_index:
    if leave_id[sample_id] == node_id:
 def RandomForest_regression(self):
     rfr = RFR(n_estimators=1000, max_depth=4)
     rfr.fit(self.train_X, self.train_y)
     path = rfr.decision_path(self.train_X)
     self.y_pre_train = rfr.predict(self.train_X)
     self.y_pre_test = rfr.predict(self.test_X)