Example #1
0
def ID3(Attributes, X, Y):
    """
    Implementaion of the ID3 Algorithm

    :param Attributes: List of attributes to test
    :param X: set of training instances
    :param Y: set of training labels
    :return: decision tree build from the training data
    """
    # Handle base cases
    num_positive, num_negative = Calculate_Counts(Y)
    # All labels are positive
    if num_positive == len(Y):
        return Node(label='e')
    # All labels are negative
    elif num_negative == len(Y):
        return Node(label='-')
    # No attributes left to test (Choose the most common label amongst the remaining examples)
    if len(Attributes) == 0:
        if num_positive > num_negative:
            return Node(label='e')
        elif num_positive < num_negative:
            return Node(label='-')
        else:
            return Node(label=random.choice(['e','-']))

    # Determine the best attribute
    max_gain = None
    max_attribute = 0
    max_indices = []
    positive_splits = []
    negative_splits = []
    gains = []
    for i in range(len(Attributes)):
        gain, positive_split, negative_split = Gain(Attributes[i], X, Y)
        gains.append(gain)
        positive_splits.append(positive_split)
        negative_splits.append(negative_split)
        # Keep track of max gain(s)
        if max_gain == None:
            max_gain = gain
            max_indices.append(i)
        elif gain > max_gain:
            max_gain = gain
            max_indices = [i]
        elif gain == max_gain:
            max_indices.append(i)
        

    max_index = random.choice(max_indices)
    max_attribute = Attributes[max_index]
    max_positive_split = positive_splits[max_index]
    max_negative_split = negative_splits[max_index]

    # Remove the attribute from the list of attributes (Attrubutes - {A})
    Attributes = Attributes[:max_index] + Attributes[max_index+1:]

    # Set Attribute of the decision node to the one with the max gain
    current_node = Node(attribute=max_attribute)

    # Build positive child node
    if len(max_positive_split[0]) > 0:
        current_node.Positive_Branch = ID3(Attributes, max_positive_split[0], max_positive_split[1])
    else:
        if num_positive > num_negative:
            current_node.Positive_Branch = Node(label='e')
        elif num_positive < num_negative:
            current_node.Positive_Branch = Node(label='-')
        else:
            current_node.Positive_Branch = Node(label=random.choice(['e','-']))

    # Build negative child node
    if len(max_negative_split[0]) > 0:
        current_node.Negative_Branch = ID3(Attributes, max_negative_split[0], max_negative_split[1])
    else:
        if num_positive > num_negative:
            current_node.Negative_Branch = Node(label='e')
        elif num_positive < num_negative:
            current_node.Negative_Branch = Node(label='-')
        else:
            current_node.Negative_Branch = Node(label=random.choice(['e','-']))
    return current_node