Python ranger Examples

Programming Language: Python

Namespace/Package Name: skranger.ensemble.ranger

Method/Function: ranger

Examples at hotexamples.com: 5

Python ranger - 5 examples found. These are the top rated real world Python examples of skranger.ensemble.ranger.ranger extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def predict_proba(self, X):
        """Predict probabilities for classes from X.

        :param array2d X: prediction input features
        """
        check_is_fitted(self)
        X = check_array(X)
        self._check_n_features(X, reset=False)

        result = ranger.ranger(
            self.tree_type_,
            np.asfortranarray(X.astype("float64")),
            np.asfortranarray([[]]),
            self.feature_names_,  # variable_names
            self.mtry_,
            self.n_estimators,  # num_trees
            self.verbose,
            self.seed,
            self.n_jobs_,  # num_threads
            False,  # write_forest
            self.importance_mode_,
            self.min_node_size,
            self.split_select_weights or [],
            bool(self.split_select_weights),  # use_split_select_weights
            [],  # always_split_variable_names
            False,  # use_always_split_variable_names
            True,  # prediction_mode
            self.ranger_forest_["forest"],  # loaded_forest
            np.asfortranarray([[]]),  # snp_data
            self.replace,  # sample_with_replacement
            False,  # probability
            self.categorical_features_,  # unordered_feature_names
            bool(self.categorical_features_),  # use_unordered_features
            self.save_memory,
            self.split_rule_,
            [],  # case_weights
            False,  # use_case_weights
            self.class_weights or [],
            False,  # predict_all
            self.keep_inbag,
            [1],  # sample_fraction
            0.5,  # alpha
            0.1,  # minprop
            self.holdout,
            1,  # prediction_type
            self.num_random_splits,
            False,  # use_sparse_data
            self.order_snps_,
            self.oob_error,
            self.max_depth,
            self.inbag or [],
            bool(self.inbag),  # use_inbag
            self.regularization_factor_,
            self.use_regularization_factor_,
            self.regularization_usedepth,
        )
        predictions = np.atleast_2d(np.array(result["predictions"]))
        return predictions[:, self.ranger_class_order_]

Example #2

Show file

    def _get_terminal_node_forest(self, X):
        """Get a terminal node forest for X.

        :param array2d X: prediction input features
        """
        # many fields defaulted here which are unused
        forest = ranger.ranger(
            self.tree_type_,
            np.asfortranarray(X.astype("float64")),
            np.asfortranarray([[]]),
            self.feature_names_,  # variable_names
            0,  # m_try
            self.n_estimators,  # num_trees
            self.verbose,
            self.seed,
            self.n_jobs_,  # num_threads
            False,  # write_forest
            0,  # importance_mode
            0,  # min_node_size
            [],  # split_select_weights
            False,  # use_split_select_weights
            [],  # always_split_feature_names
            False,  # use_always_split_feature_names
            True,  # prediction_mode
            self.ranger_forest_["forest"],  # loaded_forest
            np.asfortranarray([[]]),  # snp_data
            True,  # sample_with_replacement
            False,  # probability
            [],  # unordered_feature_names
            False,  # use_unordered_features
            False,  # save_memory
            1,  # split_rule
            [],  # case_weights
            False,  # use_case_weights
            [],  # class_weights
            False,  # predict_all
            self.keep_inbag,
            [1],  # sample_fraction
            0,  # alpha
            0,  # minprop
            self.holdout,
            2,  # prediction_type (terminal nodes)
            1,  # num_random_splits
            False,  # use_sparse_data
            False,  # order_snps_
            False,  # oob_error
            0,  # max_depth
            [],  # inbag
            False,  # use_inbag
            [],  # regularization_factor_
            False,  # use_regularization_factor_
            False,  # regularization_usedepth
        )
        return forest

Example #3

Show file

    def fit(self, X, y, sample_weight=None):
        """Fit the ranger random forest using training data.

        :param array2d X: training input features
        :param array1d y: training input targets
        :param array1d sample_weight: optional weights for input samples
        """
        self.tree_type_ = 3  # tree_type, TREE_REGRESSION

        # Check input
        X, y = self._validate_data(X, y)

        # Check the init parameters
        self._validate_parameters(X, y, sample_weight)

        if sample_weight is not None:
            sample_weight = _check_sample_weight(sample_weight, X)
            use_sample_weight = True
            # ranger does additional rng on samples if weights are passed.
            # if the weights are ones, then we dont want that extra rng.
            if np.array_equal(np.unique(sample_weight), np.array([1.0])):
                sample_weight = []
                use_sample_weight = False
        else:
            sample_weight = []
            use_sample_weight = False

        # Set X info
        self.feature_names_ = [str(c).encode() for c in range(X.shape[1])]
        self._check_n_features(X, reset=True)

        if self.always_split_features is not None:
            always_split_features = [
                str(c).encode() for c in self.always_split_features
            ]
        else:
            always_split_features = []

        # Fit the forest
        self.ranger_forest_ = ranger.ranger(
            self.tree_type_,
            np.asfortranarray(X.astype("float64")),
            np.asfortranarray(np.atleast_2d(y).astype("float64").transpose()),
            self.feature_names_,  # variable_names
            self.mtry_,
            self.n_estimators,  # num_trees
            self.verbose,
            self.seed,
            self.n_jobs_,  # num_threads
            True,  # write_forest
            self.importance_mode_,
            self.min_node_size,
            self.split_select_weights or [],
            bool(self.split_select_weights),  # use_split_select_weights
            always_split_features,  # always_split_feature_names
            bool(always_split_features),  # use_always_split_feature_names
            False,  # prediction_mode
            {},  # loaded_forest
            np.asfortranarray([[]]),  # snp_data
            self.replace,  # sample_with_replacement
            False,  # probability
            self.categorical_features_,  # unordered_feature_names
            bool(self.categorical_features_),  # use_unordered_features
            self.save_memory,
            self.split_rule_,
            sample_weight,  # case_weights
            use_sample_weight,  # use_case_weights
            [],  # class_weights
            False,  # predict_all
            self.keep_inbag,
            self.sample_fraction_,
            self.alpha,
            self.minprop,
            self.holdout,
            1,  # prediction_type
            self.num_random_splits,
            False,  # use_sparse_data
            self.order_snps_,
            self.oob_error,
            self.max_depth,
            self.inbag or [],
            bool(self.inbag),  # use_inbag
            self.regularization_factor_,
            False,  # use_regularization_factor
            self.regularization_usedepth,
        )

        if self.quantiles:
            forest = self._get_terminal_node_forest(X)
            terminal_nodes = np.array(forest["predictions"]).astype(int)
            self.random_node_values_ = np.empty(
                (np.max(terminal_nodes) + 1, self.n_estimators))
            self.random_node_values_[:] = np.nan
            for tree in range(self.n_estimators):
                idx = np.arange(X.shape[0])
                np.random.shuffle(idx)
                self.random_node_values_[terminal_nodes[idx, tree],
                                         tree] = y[idx]

        return self

Example #4

Show file

File: ranger_forest_survival.py Project: kmacdon/skranger

    def fit(self, X, y, sample_weight=None):
        """Fit the ranger random forest using training data.

        :param array2d X: training input features
        :param array2d y: training input targets, rows of (bool, float)
            representing (survival, time)
        :param array1d sample_weight: optional weights for input samples
        """
        self.tree_type_ = 5  # tree_type, TREE_SURVIVAL

        # Check input
        X = check_array(X)

        # convert 1d array of 2tuples to 2d array
        # ranger expects the time first, and status second
        # since we follow the scikit-survival convention, we fliplr
        y = np.fliplr(np.array(y.tolist()))

        # Check the init parameters
        self._validate_parameters(X, y, sample_weight)

        if sample_weight is not None:
            sample_weight = _check_sample_weight(sample_weight, X)
            use_sample_weight = True
            # ranger does additional rng on samples if weights are passed.
            # if the weights are ones, then we dont want that extra rng.
            if np.array_equal(np.unique(sample_weight), np.array([1.0])):
                sample_weight = []
                use_sample_weight = False
        else:
            sample_weight = []
            use_sample_weight = False

        # Set X info
        self.feature_names_ = [str(c).encode() for c in range(X.shape[1])]
        self._check_n_features(X, reset=True)

        if self.always_split_features is not None:
            always_split_features = [
                str(c).encode() for c in self.always_split_features
            ]
        else:
            always_split_features = []

        # Fit the forest
        self.ranger_forest_ = ranger.ranger(
            self.tree_type_,
            np.asfortranarray(X.astype("float64")),
            np.asfortranarray(y.astype("float64")),
            self.feature_names_,  # variable_names
            self.mtry_,
            self.n_estimators,  # num_trees
            self.verbose,
            self.seed,
            self.n_jobs_,  # num_threads
            True,  # write_forest
            self.importance_mode_,
            self.min_node_size,
            self.split_select_weights or [],
            bool(self.split_select_weights),  # use_split_select_weights
            always_split_features,  # always_split_variable_names
            bool(always_split_features),  # use_always_split_variable_names
            False,  # prediction_mode
            {},  # loaded_forest
            np.asfortranarray([[]]),  # snp_data
            self.replace,  # sample_with_replacement
            False,  # probability
            self.categorical_features_,  # unordered_feature_names
            bool(self.categorical_features_),  # use_unordered_features
            False,  # save_memory
            self.split_rule_,
            sample_weight,  # case_weights
            use_sample_weight,  # use_case_weights
            [],  # class_weights
            False,  # predict_all
            self.keep_inbag,
            self.sample_fraction_,
            self.alpha,
            self.minprop,
            self.holdout,
            1,  # prediction_type
            self.num_random_splits,
            False,  # use_sparse_data
            self.order_snps_,
            self.oob_error,
            self.max_depth,
            self.inbag or [],
            bool(self.inbag),  # use_inbag
            self.regularization_factor_,
            False,  # use_regularization_factor
            self.regularization_usedepth,
        )
        self.event_times_ = np.array(
            self.ranger_forest_["forest"]["unique_death_times"])
        # dtype to suppress warning about ragged nested sequences
        self.cumulative_hazard_function_ = np.array(
            self.ranger_forest_["forest"]["cumulative_hazard_function"],
            dtype=object)
        return self

Example #5

Show file

    def fit(self, X, y, sample_weight=None):
        """Fit the ranger random forest using training data.

        :param array2d X: training input features
        :param array1d y: training input target classes
        :param array1d sample_weight: optional weights for input samples
        """
        self.tree_type_ = 9  # tree_type, TREE_PROBABILITY enables predict_proba

        # Check input
        X, y = self._validate_data(X, y)
        check_classification_targets(y)

        # Check the init parameters
        self._validate_parameters(X, y, sample_weight)

        if sample_weight is not None:
            sample_weight = _check_sample_weight(sample_weight, X)
            use_sample_weight = True
            # ranger does additional rng on samples if weights are passed.
            # if the weights are ones, then we dont want that extra rng.
            if np.array_equal(np.unique(sample_weight), np.array([1.0])):
                sample_weight = []
                use_sample_weight = False
        else:
            sample_weight = []
            use_sample_weight = False

        # Map classes to indices
        y = np.copy(y)
        self.classes_, y = np.unique(y, return_inverse=True)
        self.n_classes_ = len(self.classes_)

        # Set X info
        self.feature_names_ = [str(c).encode() for c in range(X.shape[1])]
        self._check_n_features(X, reset=True)

        if self.always_split_features is not None:
            always_split_features = [
                str(c).encode() for c in self.always_split_features
            ]
        else:
            always_split_features = []

        # Fit the forest
        self.ranger_forest_ = ranger.ranger(
            self.tree_type_,
            np.asfortranarray(X.astype("float64")),
            np.asfortranarray(np.atleast_2d(y).astype("float64").transpose()),
            self.feature_names_,  # variable_names
            self.mtry_,
            self.n_estimators,  # num_trees
            self.verbose,
            self.seed,
            self.n_jobs_,  # num_threads
            True,  # write_forest
            self.importance_mode_,
            self.min_node_size,
            self.split_select_weights or [],
            bool(self.split_select_weights),  # use_split_select_weights
            always_split_features,  # always_split_variable_names
            bool(always_split_features),  # use_always_split_variable_names
            False,  # prediction_mode
            {},  # loaded_forest
            np.asfortranarray([[]]),  # snp_data
            self.replace,  # sample_with_replacement
            False,  # probability
            self.categorical_features_,  # unordered_variable_names
            bool(self.categorical_features_),  # use_unordered_variable_names
            self.save_memory,
            self.split_rule_,
            sample_weight,  # case_weights
            use_sample_weight,  # use_case_weights
            self.class_weights or [],
            False,  # predict_all
            self.keep_inbag,
            self.sample_fraction_,
            0.5,  # alpha, ignored because maxstat can't be used on classification
            0.1,  # minprop, ignored because maxstat can't be used on classification
            self.holdout,
            1,  # prediction_type
            self.num_random_splits,
            False,  # use_sparse_data
            self.order_snps_,
            self.oob_error,
            self.max_depth,
            self.inbag or [],
            bool(self.inbag),  # use_inbag
            self.regularization_factor_,
            False,  # use_regularization_factor
            self.regularization_usedepth,
        )
        self.ranger_class_order_ = np.argsort(
            np.array(
                self.ranger_forest_["forest"]["class_values"]).astype(int))
        return self