def DTL(examples, attributes, parent_examples, orig_examples, att_dict, classes): """Holds the main algorithm componentes for creatin a Decision tree. Recursive function with base cases for: - no more examples available - all remaining examples have the same class - no more attributes available Recursive step calls get_next_attribute() to decide which node to add next """ #orig_examples is used to be able to loop all v_values, even if some of the possible #v_values are no longer present in examples at a given level of the tree if len(examples) == 0: return plurality_value(parent_examples) elif check_if_all_same(examples): return get_class(examples[0]) elif all(x is None for x in attributes): return plurality_value(examples) else: next_attribute = get_next_attribute(attributes, examples, classes) node = TreeNode(next_attribute, att_dict, classes, list(), list()) v_values = set( column(orig_examples, attributes.index(next_attribute) + 1)) next_attribute_index = attributes.index(next_attribute) attributes[attributes.index(next_attribute)] = None for i in v_values: child_examples = [ x for x in examples if x[next_attribute_index + 1] == i ] node.add_examples(column(child_examples, 0), int(i)) subtree = DTL(child_examples, attributes, examples, orig_examples, att_dict, classes) node.add_child(subtree, int(i)) return node
def _build_tree(self, lis, temp_id=1, parent=None): if len(lis) == 0: return None if len(lis) == 0: return None parentnode = parent if temp_id == 1 and parent == None: val = lis[0] parentnode = TreeNode(val, self.get_pos(val), self.get_position(val), temp_id, parent) temp_id += 1 for i in range(1, len(lis)): if type(lis[i]) == str: parentnode.add_child(lis[i], self.get_pos(lis[i]), self.get_position(lis[i]), temp_id, parentnode) temp_id += 1 else: subtreenode = parentnode.add_child( lis[i][0], self.get_pos(lis[i][0]), self.get_position(lis[i][0]), temp_id, parentnode) temp_id += 1 useless, temp_id = self._build_tree(lis[i], temp_id, subtreenode) return parentnode, temp_id
for item in file_data2: fileCount = fileCount + 1 nodes2.append(item) print "number of reddits in the new dictionary %d " % fileCount nodecount = 0 for node in nodes2: if node.subscribers < 1000: nodecount = nodecount+1 if node.subscribers is None: print "There is a subreddit with no subscribers" print "total nodes with less than 1000 subscribers % d " % nodecount count = 0 reddit500 = [] for node in nodes2: reddit500.append(node) count = count + 1 print count for node in reddit500: if node.parent is None: reddit.add_child(node) # dump the data into a json file to create viz# json_output = open('REDDITS4.json', 'wb') a = "var viz_data = " + json.dumps(reddit.return_json()) + ";" json_output.write(a) json_output.close()