Example #1
0
    def __create_node__(self, parent_node, parent_attribute_val, xs, ys,
                        attributes, level):

        if level >= self.max_depth:
            val = self.__get_most_frequent_value__(ys)
            return Leaf(parent_attribute_val, val)

        distinct_ys = set(ys)
        if len(distinct_ys) == 1:
            return Leaf(parent_attribute_val, ys[0])

        best_attribute_name = self.__find_best_attribute__(
            attributes, parent_node.attribute_name, parent_attribute_val, xs,
            ys)
        new_node = Node(best_attribute_name, parent_attribute_val, parent_node,
                        self.fn_attribute_val_extractor)

        # Pivot by value to segment dataset for children
        xs_by_val, ys_by_val = self.__partition_by_attribute__(
            best_attribute_name, parent_node.attribute_name,
            parent_attribute_val, xs, ys)

        # If only one grouping, no further attributes are useful, take majority vote for class
        if len(ys_by_val) == 1:
            val = self.__get_most_frequent_value__(ys)
            return Leaf(parent_attribute_val, val)

        # Remove matched attribute
        remaining_attributes = [
            a for a in attributes if a != best_attribute_name
        ]
        for attr_val in xs_by_val.keys():
            xs4val = xs_by_val[attr_val]
            ys4val = ys_by_val[attr_val]

            child_node = self.__create_node__(new_node, attr_val, xs4val,
                                              ys4val, remaining_attributes,
                                              level + 1)
            new_node.add_child(child_node)
        return new_node