def __print_performance(self, iter, X, y, unique_labels, probabilistic_leaf): self.__assign_leaves_classes(X, y, unique_labels, probabilistic_leaf) print("iter: ", iter) p_dict = global_impurity_tree_math2.calc_p_dict(self.__head, X) leaves = self.__head._get_leaves() max_leaf_depth = None min_leaf_depth = None for leaf in leaves: depth = leaf._depth() if max_leaf_depth is None or depth > max_leaf_depth: max_leaf_depth = depth if min_leaf_depth is None or depth < min_leaf_depth: min_leaf_depth = depth subset_assign_probs = np.zeros((X.shape[0], len(leaves))) for i in range(len(leaves)): subset_assign_probs[:, i] = p_dict[leaves[i]] print("- Expected GINI: ", impurity.expected_gini(subset_assign_probs, y)) print("- # nodes: ", len(self.__head._to_list())) print("- # leaves: ", len(self.__head._get_leaves())) print("- Min/max leaf depth: ", (min_leaf_depth, max_leaf_depth)) predictions = self.predict(X) _, counts = np.unique(predictions, return_counts=True) print("- Prediction distribution: ", counts) print("- Train accuracy: ", 100.0 * np.sum(y == predictions) / float(y.shape[0])) print("------------------------------------------")
def __print_performance(self, X, y): leaf_maxes = self._leaf_maxes(X) _, leaf_counts = np.unique(leaf_maxes, return_counts = True) print("falling leaves: ", leaf_counts) print("leaf probs: ", self._leaf_probs(X)) print("expected GINI: ", impurity.expected_gini(self._leaf_probs(X), y)) y_in_leaves = [y[np.where(leaf_maxes == leaf_num)] for leaf_num in range(len(self._head._get_leaves()))] print("GINI: ", impurity.gini(y_in_leaves)) try: self.__set_leaf_labels(X, y) predicts = self.predict(X) print("accuracy: ", 100.0*(np.sum(predicts == y))/float(y.shape[0])) except: print("tree not trained enough to evaluate accuracy") print("----------------------------------")
def expected_gini(self, X, y): probs = self.predict(X) subset_probs = np.array([probs, 1 - probs]).T return impurity.expected_gini(subset_probs, y)
def expected_gini(self, X, y): probs = self.predict(X) return impurity.expected_gini(probs, y)
def __expected_gini(self, X, y): return impurity.expected_gini(self.predict(X), y)
def __expected_GINI(self, leaves, p_arr, y): subset_assign_probs = np.zeros((y.shape[0], len(leaves))) for leaf_ind in range(len(leaves)): subset_assign_probs[:, leaf_ind] = p_arr[leaves[leaf_ind]._ID] return impurity.expected_gini(subset_assign_probs, y)