예제 #1
0
def DTL(examples, attributes, parent_examples, orig_examples, att_dict,
        classes):
    """Holds the main algorithm componentes for creatin a Decision tree.
    Recursive function with base cases for:
    - no more examples available
    - all remaining examples have the same class
    - no more attributes available
    Recursive step calls get_next_attribute() to decide which node to add next
    """

    #orig_examples is used to be able to loop all v_values, even if some of the possible
    #v_values are no longer present in examples at a given level of the tree
    if len(examples) == 0:
        return plurality_value(parent_examples)
    elif check_if_all_same(examples):
        return get_class(examples[0])
    elif all(x is None for x in attributes):
        return plurality_value(examples)
    else:
        next_attribute = get_next_attribute(attributes, examples, classes)
        node = TreeNode(next_attribute, att_dict, classes, list(), list())
        v_values = set(
            column(orig_examples,
                   attributes.index(next_attribute) + 1))
        next_attribute_index = attributes.index(next_attribute)
        attributes[attributes.index(next_attribute)] = None
        for i in v_values:
            child_examples = [
                x for x in examples if x[next_attribute_index + 1] == i
            ]
            node.add_examples(column(child_examples, 0), int(i))
            subtree = DTL(child_examples, attributes, examples, orig_examples,
                          att_dict, classes)
            node.add_child(subtree, int(i))
        return node
예제 #2
0
파일: tree.py 프로젝트: zclore/helloworld
    def _build_tree(self, lis, temp_id=1, parent=None):
        if len(lis) == 0:
            return None

        if len(lis) == 0:
            return None
        parentnode = parent
        if temp_id == 1 and parent == None:
            val = lis[0]
            parentnode = TreeNode(val, self.get_pos(val),
                                  self.get_position(val), temp_id, parent)
            temp_id += 1

        for i in range(1, len(lis)):
            if type(lis[i]) == str:
                parentnode.add_child(lis[i], self.get_pos(lis[i]),
                                     self.get_position(lis[i]), temp_id,
                                     parentnode)
                temp_id += 1
            else:
                subtreenode = parentnode.add_child(
                    lis[i][0], self.get_pos(lis[i][0]),
                    self.get_position(lis[i][0]), temp_id, parentnode)
                temp_id += 1
                useless, temp_id = self._build_tree(lis[i], temp_id,
                                                    subtreenode)

        return parentnode, temp_id
예제 #3
0
파일: binary.py 프로젝트: aagarwal92/TLDR
for item in file_data2:
    fileCount = fileCount + 1    
    nodes2.append(item)

print "number of reddits in the new dictionary %d " % fileCount


nodecount = 0
for node in nodes2:
        if node.subscribers < 1000:
            nodecount = nodecount+1
        if node.subscribers is None:
            print "There is a subreddit with no subscribers"
print "total nodes with less than 1000 subscribers % d " % nodecount

count = 0
reddit500 = []
for node in nodes2:
        reddit500.append(node)
        count = count + 1
print count

for node in reddit500:
    if node.parent is None:
        reddit.add_child(node)
    
# dump the data into a json file to create viz#    
json_output = open('REDDITS4.json', 'wb')
a = "var viz_data = " + json.dumps(reddit.return_json()) + ";"
json_output.write(a)
json_output.close()