def __create_node__(self, parent_node, parent_attribute_val, xs, ys, attributes, level): if level >= self.max_depth: val = self.__get_most_frequent_value__(ys) return Leaf(parent_attribute_val, val) distinct_ys = set(ys) if len(distinct_ys) == 1: return Leaf(parent_attribute_val, ys[0]) best_attribute_name = self.__find_best_attribute__( attributes, parent_node.attribute_name, parent_attribute_val, xs, ys) new_node = Node(best_attribute_name, parent_attribute_val, parent_node, self.fn_attribute_val_extractor) # Pivot by value to segment dataset for children xs_by_val, ys_by_val = self.__partition_by_attribute__( best_attribute_name, parent_node.attribute_name, parent_attribute_val, xs, ys) # If only one grouping, no further attributes are useful, take majority vote for class if len(ys_by_val) == 1: val = self.__get_most_frequent_value__(ys) return Leaf(parent_attribute_val, val) # Remove matched attribute remaining_attributes = [ a for a in attributes if a != best_attribute_name ] for attr_val in xs_by_val.keys(): xs4val = xs_by_val[attr_val] ys4val = ys_by_val[attr_val] child_node = self.__create_node__(new_node, attr_val, xs4val, ys4val, remaining_attributes, level + 1) new_node.add_child(child_node) return new_node