Example #1
0
    def __print_performance(self, iter, X, y, unique_labels,
                            probabilistic_leaf):
        self.__assign_leaves_classes(X, y, unique_labels, probabilistic_leaf)
        print("iter: ", iter)
        p_dict = global_impurity_tree_math2.calc_p_dict(self.__head, X)
        leaves = self.__head._get_leaves()
        max_leaf_depth = None
        min_leaf_depth = None
        for leaf in leaves:
            depth = leaf._depth()
            if max_leaf_depth is None or depth > max_leaf_depth:
                max_leaf_depth = depth
            if min_leaf_depth is None or depth < min_leaf_depth:
                min_leaf_depth = depth

        subset_assign_probs = np.zeros((X.shape[0], len(leaves)))
        for i in range(len(leaves)):
            subset_assign_probs[:, i] = p_dict[leaves[i]]
        print("- Expected GINI: ",
              impurity.expected_gini(subset_assign_probs, y))
        print("- # nodes: ", len(self.__head._to_list()))
        print("- # leaves: ", len(self.__head._get_leaves()))
        print("- Min/max leaf depth: ", (min_leaf_depth, max_leaf_depth))
        predictions = self.predict(X)
        _, counts = np.unique(predictions, return_counts=True)
        print("- Prediction distribution: ", counts)
        print("- Train accuracy: ",
              100.0 * np.sum(y == predictions) / float(y.shape[0]))
        print("------------------------------------------")
Example #2
0
 def __print_performance(self, X, y):
     leaf_maxes = self._leaf_maxes(X)
     _, leaf_counts = np.unique(leaf_maxes, return_counts = True)
     print("falling leaves: ", leaf_counts)
     print("leaf probs: ", self._leaf_probs(X))
     print("expected GINI: ", impurity.expected_gini(self._leaf_probs(X), y))
     y_in_leaves = [y[np.where(leaf_maxes == leaf_num)] for leaf_num in range(len(self._head._get_leaves()))]
     print("GINI: ", impurity.gini(y_in_leaves))
     try:
         self.__set_leaf_labels(X, y)
         predicts = self.predict(X)
         print("accuracy: ", 100.0*(np.sum(predicts == y))/float(y.shape[0]))
     except:
         print("tree not trained enough to evaluate accuracy")
     print("----------------------------------")
 def expected_gini(self, X, y):
     probs = self.predict(X)
     subset_probs = np.array([probs, 1 - probs]).T
     return impurity.expected_gini(subset_probs, y)
 def expected_gini(self, X, y):
     probs = self.predict(X)
     return impurity.expected_gini(probs, y)
Example #5
0
 def __expected_gini(self, X, y):
     return impurity.expected_gini(self.predict(X), y)
Example #6
0
 def __expected_GINI(self, leaves, p_arr, y):
     subset_assign_probs = np.zeros((y.shape[0], len(leaves)))
     for leaf_ind in range(len(leaves)):
         subset_assign_probs[:, leaf_ind] = p_arr[leaves[leaf_ind]._ID]
     return impurity.expected_gini(subset_assign_probs, y)