def build_tree(self, training_data, limit=None): """ Recursive function to build decision tree Parameters: training_data - Pandas DataFrame; last column is taken as the class labels Keyword Args: limit - Max depth limit of tree process; None by default """ node = AnyNode() # Data is pure, create leaf of tree with class label if len(set(training_data.iloc[:, -1])) == 1: node.label = training_data.iloc[0, -1] return node # No more features to split on; use most common label as class label if len(training_data.columns) == 1: node.label = max(set(training_data.iloc[:, -1]), key=list(training_data.iloc[:, -1]).count) return node # Default; begin tree splitting # Determine feature that gives best information gain split_feature = max( training_data.columns[0:-1], default=training_data.columns[0], key=lambda x: info_gain(training_data, x, self.bins)) node.attribute = split_feature # Lookup possible values for splitting feature and # create leaves/subtrees values = self.values[split_feature] for value in values: # Create subset with feature removed training_data_v = subset_by_value(training_data, split_feature, value) training_data_v = training_data_v.drop(split_feature, axis=1) # Subset data based on value if training_data_v.empty or (limit is not None and limit < 1): # subset is empty; create child leaf with label of the # most common class label child = AnyNode() child.label = max(set(training_data.iloc[:, -1]), key=list(training_data.iloc[:, -1]).count) child.value = value else: # subset is not empty; create child subtree recursively new_limit = None if limit is None else limit - 1 child = self.build_tree(training_data_v, new_limit) child.value = value # Add new node as child of the current node and # map value to this child node.children = list(node.children) + [child] return node
def prune_tree(parent: AnyNode, player_id: int, eps: float = 1e-8) -> None: """ Given a game tree, select the optimal choice(s) for a certain player. Note that the eps arg is used to get rid of the numpy eps error, which will make the code more robust. For example, instead of 0.325, cal_win_prob([0, 0.65, 0.2])[2] is 0.32500000000000007, which will result in a but in pruning the tree. :param parent: The root of the (sub)-tree that is going to be pruned :param player_id: The id of the player :param eps: The criterion of "optimal enough". :return: Since all manipulations will be in-place, return nothing. """ player_win_prob = [leaf.win_prob[player_id] for leaf in parent.children] max_ind = np.argwhere(player_win_prob >= max(player_win_prob) * (1 - eps)).ravel() # if there are more than 1 optimum, use the expectation expected_parent_win_prob = np.vstack( tuple(parent.children[i].win_prob for i in max_ind) ).mean(axis=0) parent.win_prob = expected_parent_win_prob parent.children = tuple(parent.children[i] for i in max_ind) return None
def affinity_tree(g): ''' Creates a contraction tree as seen in Aydin, K.; Bateni, M.H.; Mirrokni, V.: Distributed Balanced Partitioning via Linear Embedding. ''' if config.TIME_STAMPS >= config.TimeStamps.ALL: i = 0 set_weights_graph(g) last_iteration = list() for id in range(g.numberOfNodes()): last_iteration.append(AnyNode(node_id=id)) # officially supported python construct for 'do ... while' while True: if config.TIME_STAMPS >= config.TimeStamps.ALL: print("------ Layer {:d} ------".format(i)) i += 1 curr_contraction = find_closest_neighbor_edges(g) dict_contraction = curr_contraction.get() g = contract_to_nodes(g, curr_contraction, dict_contraction) curr_iteration = list() for k, v in dict_contraction.items( ): # Relies on the dict iterating in the same order as in contract_to_nodes n = AnyNode() n.children = [last_iteration[i] for i in v] curr_iteration.append(n) last_iteration = curr_iteration if len(dict_contraction) == 1: break return last_iteration[0]