コード例 #1
0
 def test_list_to_discrete_rv(self):
     cases = [
         ([0, 2, 2, 3], ([0, 2, 3], [0.25, 0.5, 0.25])),
     ]
     for case in cases:
         arg = case[0]
         result_expected = case[1]
         result_actual = list_to_discrete_rv(np.array(arg))
         self.assertTrue(np.all(result_actual[0] == result_expected[0]))
         self.assertTrue(np.all(result_actual[1] == result_expected[1]))
コード例 #2
0
ファイル: test_metrics.py プロジェクト: dmitru/pines
 def test_list_to_discrete_rv(self):
     cases = [
         ([0, 2, 2, 3], ([0, 2, 3], [0.25, 0.5, 0.25])),
     ]
     for case in cases:
         arg = case[0]
         result_expected = case[1]
         result_actual = list_to_discrete_rv(np.array(arg))
         self.assertTrue(np.all(result_actual[0] == result_expected[0]))
         self.assertTrue(np.all(result_actual[1] == result_expected[1]))
コード例 #3
0
    def _build_tree_recursive_oblivious(self, tree, cur_level):
        nodes_on_current_level = tree.nodes_at_level(cur_level, kind='all')

        for node_id in nodes_on_current_level:
            X, y = self._data_per_node[node_id]
            tree._leaf_n_samples[node_id] = len(y)

        leaves_reached = False
        if self.max_depth is not None and cur_level >= self.max_depth:
            if TreeBuilderObliviousCart.debug:
                TreeBuilderObliviousCart.logger.debug('Max depth reached at level {}'.format(cur_level))
            leaves_reached = True

        best_layer_split = self.find_best_layer_split(nodes_on_current_level)

        if best_layer_split is None:
            if TreeBuilderObliviousCart.debug:
                TreeBuilderObliviousCart.logger.debug('No split found at level {}'.format(cur_level))
            leaves_reached = True
            self._no_split_found = True

        if self.switch_criterion.should_switch(self):
            if TreeBuilderObliviousCart.debug:
                TreeBuilderObliviousCart.logger.debug('Switching to CART tree at level {}'.format(cur_level))
            self.mode = TreeType.CART
            for node in nodes_on_current_level:
                self._build_tree_recursive_cart(tree, node, self._data_per_node[node][0], self._data_per_node[node][1])
            return

        if leaves_reached:
            # Process nodes that won't be splitted and are going to become leaves in the final tree
            for node_id in nodes_on_current_level:
                _, y = self._data_per_node[node_id]
                if self.is_regression:
                    tree._leaf_values[node_id] = np.mean(y)
                else:
                    if self.leaf_prediction_rule == 'majority':
                        tree._leaf_values[node_id] = scipy.stats.mode(y).mode[0]
                    elif self.leaf_prediction_rule == 'distribution':
                        values, probabilities = list_to_discrete_rv(y)
                        distribution = scipy.stats.rv_discrete(values=(values, probabilities))
                        func = lambda d: d.rvs()
                        tree._leaf_functions[node_id] = (func, distribution)
                    else:
                        raise ValueError('Invalid value for leaf_prediction_rule: {}'.format(self.leaf_prediction_rule))
            return
        else:
            for node_id, node_split in zip(nodes_on_current_level, best_layer_split):
                self.apply_node_split(tree, node_split)

            if self.max_depth is not None and cur_level < self.max_depth:
                self._build_tree_recursive_oblivious(tree, cur_level + 1)
コード例 #4
0
    def _build_tree_recursive_cart(self, tree, cur_node, X, y):
        n_samples, n_features = X.shape
        if n_samples < 1:
            return

        leaf_reached = False
        if n_samples <= self.min_samples_per_leaf:
            leaf_reached = True
        depth = tree.depth(cur_node)
        if self.max_depth is not None and depth >= self.max_depth:
            leaf_reached = True

        best_split = None
        if not leaf_reached:
            if TreeBuilderObliviousCart.debug:
                TreeBuilderObliviousCart.logger.debug('Split at node {}, n = {}'.format(cur_node, n_samples))

            best_split = self.find_best_split(X, y)
            if best_split is None:
                if TreeBuilderObliviousCart.debug:
                    TreeBuilderObliviousCart.logger.debug('No split found for at node {}'.format(cur_node))
                leaf_reached = True

        tree._leaf_n_samples[cur_node] = len(y)
        if leaf_reached:
            if self.is_regression:
                tree._leaf_values[cur_node] = np.mean(y)
            else:
                if self.leaf_prediction_rule == 'majority':
                    tree._leaf_values[cur_node] = scipy.stats.mode(y).mode[0]
                elif self.leaf_prediction_rule == 'distribution':
                    values, probabilities = list_to_discrete_rv(y)
                    distribution = scipy.stats.rv_discrete(values=(values, probabilities))
                    func = lambda d: d.rvs()
                    tree._leaf_functions[cur_node] = (func, distribution)
                else:
                    raise ValueError('Invalid value for leaf_prediction_rule: {}'.format(self.leaf_prediction_rule))
        else:
            tree.split_node(cur_node, best_split)

            left_child = tree.left_child(cur_node)
            right_child = tree.right_child(cur_node)
            X_left, X_right, y_left, y_right = split_dataset(
                    X, y, best_split.feature_id, best_split.value)
            self._build_tree_recursive_cart(tree, left_child, X_left, y_left)
            self._build_tree_recursive_cart(tree, right_child, X_right, y_right)