def ID3(Attributes, X, Y): """ Implementaion of the ID3 Algorithm :param Attributes: List of attributes to test :param X: set of training instances :param Y: set of training labels :return: decision tree build from the training data """ # Handle base cases num_positive, num_negative = Calculate_Counts(Y) # All labels are positive if num_positive == len(Y): return Node(label='e') # All labels are negative elif num_negative == len(Y): return Node(label='-') # No attributes left to test (Choose the most common label amongst the remaining examples) if len(Attributes) == 0: if num_positive > num_negative: return Node(label='e') elif num_positive < num_negative: return Node(label='-') else: return Node(label=random.choice(['e','-'])) # Determine the best attribute max_gain = None max_attribute = 0 max_indices = [] positive_splits = [] negative_splits = [] gains = [] for i in range(len(Attributes)): gain, positive_split, negative_split = Gain(Attributes[i], X, Y) gains.append(gain) positive_splits.append(positive_split) negative_splits.append(negative_split) # Keep track of max gain(s) if max_gain == None: max_gain = gain max_indices.append(i) elif gain > max_gain: max_gain = gain max_indices = [i] elif gain == max_gain: max_indices.append(i) max_index = random.choice(max_indices) max_attribute = Attributes[max_index] max_positive_split = positive_splits[max_index] max_negative_split = negative_splits[max_index] # Remove the attribute from the list of attributes (Attrubutes - {A}) Attributes = Attributes[:max_index] + Attributes[max_index+1:] # Set Attribute of the decision node to the one with the max gain current_node = Node(attribute=max_attribute) # Build positive child node if len(max_positive_split[0]) > 0: current_node.Positive_Branch = ID3(Attributes, max_positive_split[0], max_positive_split[1]) else: if num_positive > num_negative: current_node.Positive_Branch = Node(label='e') elif num_positive < num_negative: current_node.Positive_Branch = Node(label='-') else: current_node.Positive_Branch = Node(label=random.choice(['e','-'])) # Build negative child node if len(max_negative_split[0]) > 0: current_node.Negative_Branch = ID3(Attributes, max_negative_split[0], max_negative_split[1]) else: if num_positive > num_negative: current_node.Negative_Branch = Node(label='e') elif num_positive < num_negative: current_node.Negative_Branch = Node(label='-') else: current_node.Negative_Branch = Node(label=random.choice(['e','-'])) return current_node