예제 #1
0
 def test_count_nodes(self):
     n_trees, n_nodes = ProxyXGBoost._count_nodes(model_file)
     # print "Num Trees: %d\nNum Nodes: %d" % (n_trees, n_nodes),
     assert_equal(n_trees, 2)
     assert_equal(n_nodes, 14)
     assert_equal(n_trees, self.model.trees_root.size)
     assert_equal(n_nodes, self.model.trees_nodes_value.size)
예제 #2
0
    def save(self, f, format="QuickRank"):
        """
        Save the model onto the file identified by file_path, using the given
        model format.

        Parameters
        ----------
        f : str
            The path to the filename where the model has to be saved
        format : str
            The format to use for saving the model

        Returns
        -------
        status : bool
            Returns true if the save is successful, false otherwise
        """
        if format == "QuickRank":
            from rankeval.model import ProxyQuickRank
            return ProxyQuickRank.save(f, self)
        elif format == "LightGBM":
            from rankeval.model import ProxyLightGBM
            return ProxyLightGBM.save(f, self)
        elif format == "XGBoost":
            from rankeval.model import ProxyXGBoost
            return ProxyXGBoost.save(f, self)
        elif format == "ScikitLearn":
            from rankeval.model import ProxyScikitLearn
            return ProxyScikitLearn.save(f, self)
        else:
            raise TypeError("Model format %s not yet supported!" % format)
예제 #3
0
    def __init__(self,
                 file_path,
                 name=None,
                 format="QuickRank",
                 base_score=None,
                 learning_rate=1,
                 n_trees=None):
        """
        Load the model from the file identified by file_path using the given
        format.

        Parameters
        ----------
        file_path : str
            The fpath to the filename where the model has been saved
        name : str
            The name to be given to the current model
        format : ['QuickRank', 'ScikitLearn', 'XGBoost', 'LightGBM']
            The format of the model to load.
        base_score : None or float
            The initial prediction score of all instances, global bias.
            If None, it uses default value used by each software
            (0.5 XGBoost, 0.0 all the others).
        learning_rate : None or float
            The learning rate used by the model to shrinks the contribution of
             each tree. By default it is set to 1 (no shrinking at all).
        n_trees : None or int
            The maximum number of trees to load from the model. By default it is
            set to None, meaning the method will load all the trees.

        Attributes
        ----------
        file : str
            The path to the filename where the model has been saved
        name : str
            The name to be given to the current model
        n_trees : integer
            The number of regression trees in the ensemble.
        n_nodes : integer
            The total number of nodes (splitting nodes and leaves) in the ensemble
        trees_root: list of integers
            Numpy array modelling the indexes of the root nodes of the regression trees composing the ensemble. The indexes
            refer to the following data structures:
            * trees_left_child
            * trees_right_child
            * trees_nodes_value
            * trees_nodes_feature
        trees_weight: list of floats
            Numpy array modelling the weights of the regression trees composing the ensemble.
        trees_left_child: list of integers
            Numpy array modelling the structure (shape) of the regression trees, considering only the left children.
            Given a node of a regression tree (a single cell in this array), the value identify the index of the left
            children. If the node is a leaf, the children assumes -1 value.
        trees_right_child: list of integers
            Numpy array modelling the structure (shape) of the regression trees, considering only the right children.
            Given a node of a regression tree (a single cell in this array), the value identify the index of the right
            children. If the node is a leaf, the children assumes -1 value.
        trees_nodes_value: list of integers
            Numpy array modelling either the output of a leaf node (whether the node is a leaf, in accordance with the
            trees_structure data structure) or the splitting value of the node in the regression trees (with respect to the
            feature identified by the trees_nodes_feature data structure).
        trees_nodes_feature: list of integers
            Numpy array modelling the feature-id used by the selected splitting node (or -1 if the node is a leaf).

        Returns
        -------
        model : RegressionTreeEnsemble
            The loaded model as a RTEnsemble object
        """
        self.file = file_path
        self.name = "RTEnsemble: " + file_path
        if name is not None:
            self.name = name
        self.learning_rate = learning_rate

        self.base_score = base_score
        if self.base_score is None and format == "XGBoost":
            self.base_score = 0.5

        self.n_trees = None
        self.n_nodes = None

        self.trees_root = None
        self.trees_weight = None
        self.trees_left_child = None
        self.trees_right_child = None
        self.trees_nodes_value = None
        self.trees_nodes_feature = None

        self._cache_scorer = dict()

        if format == "QuickRank":
            from rankeval.model import ProxyQuickRank
            ProxyQuickRank.load(file_path, self)
        elif format == "LightGBM":
            from rankeval.model import ProxyLightGBM
            ProxyLightGBM.load(file_path, self)
        elif format == "XGBoost":
            from rankeval.model import ProxyXGBoost
            ProxyXGBoost.load(file_path, self)
        elif format == "ScikitLearn":
            from rankeval.model import ProxyScikitLearn
            ProxyScikitLearn.load(file_path, self)
        else:
            raise TypeError("Model format %s not yet supported!" % format)

        if n_trees is not None and n_trees < self.n_trees:
            self._prune_model(n_trees)