def build_tree(self, training_data, limit=None):
        """
        Recursive function to build decision tree

        Parameters:
        training_data - Pandas DataFrame; last column is taken as the class labels

        Keyword Args:
        limit - Max depth limit of tree process; None by default
        """
        node = AnyNode()

        # Data is pure, create leaf of tree with class label
        if len(set(training_data.iloc[:, -1])) == 1:
            node.label = training_data.iloc[0, -1]
            return node

        # No more features to split on; use most common label as class label
        if len(training_data.columns) == 1:
            node.label = max(set(training_data.iloc[:, -1]),
                             key=list(training_data.iloc[:, -1]).count)
            return node

        # Default; begin tree splitting
        # Determine feature that gives best information gain
        split_feature = max(
            training_data.columns[0:-1],
            default=training_data.columns[0],
            key=lambda x: info_gain(training_data, x, self.bins))
        node.attribute = split_feature

        # Lookup possible values for splitting feature and
        # create leaves/subtrees
        values = self.values[split_feature]
        for value in values:
            # Create subset with feature removed
            training_data_v = subset_by_value(training_data, split_feature,
                                              value)
            training_data_v = training_data_v.drop(split_feature, axis=1)

            # Subset data based on value
            if training_data_v.empty or (limit is not None and limit < 1):
                # subset is empty; create child leaf with label of the
                # most common class label
                child = AnyNode()
                child.label = max(set(training_data.iloc[:, -1]),
                                  key=list(training_data.iloc[:, -1]).count)
                child.value = value
            else:
                # subset is not empty; create child subtree recursively
                new_limit = None if limit is None else limit - 1
                child = self.build_tree(training_data_v, new_limit)
                child.value = value

            # Add new node as child of the current node and
            # map value to this child
            node.children = list(node.children) + [child]

        return node
Exemple #2
0
def prune_tree(parent: AnyNode, player_id: int, eps: float = 1e-8) -> None:
    """
    Given a game tree, select the optimal choice(s) for a certain player. Note that the eps arg is
    used to get rid of the numpy eps error, which will make the code more robust. For example,
    instead of 0.325, cal_win_prob([0, 0.65, 0.2])[2] is 0.32500000000000007, which will result in
    a but in pruning the tree.
    :param parent: The root of the (sub)-tree that is going to be pruned
    :param player_id: The id of the player
    :param eps: The criterion of "optimal enough".
    :return: Since all manipulations will be in-place, return nothing.
    """
    player_win_prob = [leaf.win_prob[player_id] for leaf in parent.children]
    max_ind = np.argwhere(player_win_prob >= max(player_win_prob) * (1 - eps)).ravel()
    # if there are more than 1 optimum, use the expectation
    expected_parent_win_prob = np.vstack(
        tuple(parent.children[i].win_prob for i in max_ind)
    ).mean(axis=0)
    parent.win_prob = expected_parent_win_prob
    parent.children = tuple(parent.children[i] for i in max_ind)
    return None
def affinity_tree(g):
    '''
    Creates a contraction tree as seen in Aydin, K.; Bateni, M.H.; Mirrokni, V.: Distributed Balanced Partitioning via Linear Embedding.

    '''
    if config.TIME_STAMPS >= config.TimeStamps.ALL:
        i = 0
    set_weights_graph(g)

    last_iteration = list()
    for id in range(g.numberOfNodes()):
        last_iteration.append(AnyNode(node_id=id))

    # officially supported python construct for 'do ... while'
    while True:
        if config.TIME_STAMPS >= config.TimeStamps.ALL:
            print("------ Layer {:d} ------".format(i))
            i += 1

        curr_contraction = find_closest_neighbor_edges(g)
        dict_contraction = curr_contraction.get()
        g = contract_to_nodes(g, curr_contraction, dict_contraction)

        curr_iteration = list()
        for k, v in dict_contraction.items(
        ):  # Relies on the dict iterating in the same order as in contract_to_nodes
            n = AnyNode()
            n.children = [last_iteration[i] for i in v]
            curr_iteration.append(n)

        last_iteration = curr_iteration

        if len(dict_contraction) == 1:
            break

    return last_iteration[0]