Esempio n. 1
0
    def tdidt(self, current_instances, available_attributes):
        # basic approach (uses recursion!!):
        # select an attribute to split on
        split_attribute = myutils.select_attribute(current_instances,
                                                   available_attributes,
                                                   self.header)
        available_attributes.remove(split_attribute)

        # cannot split on the same attribute twice in a branch
        # recall: python is pass by object reference!!
        tree = ["Attribute", split_attribute]

        prevPartition = current_instances
        # group data by attribute domains (creates pairwise disjoint partitions)
        partitions = self.partition_instances(current_instances,
                                              split_attribute)

        # for each partition, repeat unless one of the following occurs (base case)
        for attribute_value, partition in partitions.items():
            value_subtree = ["Value", attribute_value]
            # TODO: appending leaf nodes and subtrees appropriately to value_subtree

            #    CASE 1: all class labels of the partition are the same => make a leaf node
            if len(partition) > 0 and myutils.all_same_class(partition):
                value_subtree.append([
                    "Leaf", partition[0][-1],
                    len(partition),
                    len(current_instances)
                ])

            #    CASE 2: no more attributes to select (clash) => handle clash w/majority vote leaf node
            elif len(partition) > 0 and len(available_attributes) == 0:
                [majority, count,
                 total] = myutils.compute_partition_stats(partition)
                value_subtree.append(["Leaf", majority, count, total])

            #    CASE 3: no more instances to partition (empty partition) => backtrack and replace attribute node with majority vote leaf node
            elif len(partition) == 0:
                [majority, count,
                 total] = myutils.compute_partition_stats(prevPartition)
                value_subtree.append(["Leaf", majority, count, total])
            else:  # all base cases are false... recurse!!
                if len(available_attributes) > self.F:
                    subtree = self.tdidt(
                        partition,
                        random.sample(available_attributes.copy(), self.F))

                else:
                    subtree = self.tdidt(partition,
                                         available_attributes.copy())
                value_subtree.append(subtree)
            tree.append(value_subtree)
            # need to append subtree to value_subtree and appropriately append value subtree to tree
        return tree
Esempio n. 2
0
    def tdidt(self, current_instances, available_attributes):

        # select an attribute to split on
        split_attribute = myutils.select_attribute(current_instances, available_attributes,
                                                    self.attribute_domains, self.header)
        available_attributes.remove(split_attribute)

        # cannot split on the same attribute twice in a branch
        # recall: python is pass by object reference!!
        tree = ["Attribute", split_attribute]

        # group data by attribute domains (creates pairwise disjoint partitions)
        partitions = myutils.partition_instances(current_instances, split_attribute,
                                                self.attribute_domains, self.header)

        # for each partition, repeat unless one of the following occurs (base case)
        for attribute_value, partition in partitions.items():
            value_subtree = ["Value", attribute_value]

            #    CASE 1: all class labels of the partition are the same => make a leaf node
            if len(partition) > 0 and myutils.all_same_class(partition):
                leaf_node = myutils.create_leaf_node(partition, partitions, case = 1)
                value_subtree.append(leaf_node)
                tree.append(value_subtree)

            #    CASE 2: no more attributes to select (clash) => handle clash w/majority vote leaf node
            elif len(partition) > 0 and len(available_attributes) == 0:
                leaf_node = myutils.create_leaf_node(partition, partitions, case = 2)
                value_subtree.append(leaf_node)
                tree.append(value_subtree)

            #    CASE 3: no more instances to partition (empty partition) => backtrack and replace attribute node with majority vote leaf node
            elif len(partition) == 0:
                # replace attribute node with majority vote leaf node
                # tree = create_leaf_node(partition, partitions, case = 3)
                # break # don't look at the other attributes because we are replacing attribute node with leaf node
                return None

            else: # all base cases are false... recurse!!
                subtree = self.tdidt(partition, available_attributes.copy())
                # check if previous step was a case 3
                # create a majority vote node if so
                if subtree is None:
                    leaf_node = myutils.create_leaf_node(partition, partitions, case = 2)
                    value_subtree.append(leaf_node)
                else:
                    # need to append subtree to value_subtree and appropriately append value subtre
                    # to tree
                    # subtree is 3rd value in list
                    value_subtree.append(subtree)
                tree.append(value_subtree)

        return tree