def make_tree(train_array): global current_instances global blocked_features global tree_nodes current_instances = [] while (calculate_global_entropy(current_instances) == 0): current_instances.append(train_array.pop(0)) choose_root() for ins in train_array: (should_change, new_node_place, child_node, parent_node) = tree_root.add_instance(ins) current_instances.append(ins) if (should_change): #blocked_features = list(set().union(blocked_features , list(child_node.get_path_to_root().keys()) )) #print(child_node.get_path_to_root()) #print(blocked_features) related_instances = query(child_node.get_path_to_root()) new_feature_name = get_best_feature(parent_node, related_instances) if (new_feature_name != None): #blocked_features.append(new_feature_name) #### (new_featre_total_labels, new_feature_attr_labels) = get_feature_labels( new_feature_name, related_instances) new_node = TreeNode(parent_node, new_feature_name, new_feature_attr_labels, None, True, True, new_featre_total_labels) parent_node.children_nodes[ new_node_place].visual_node.parent = None parent_node.children_nodes[new_node_place] = new_node try: tree_nodes.remove(tree_nodes.index(child_node)) except: pass new_node_children_nodes = {} new_node_children_labels = {} tree_nodes.append(new_node) for attr_name in list(new_node.children_labels.keys()): temp_node = TreeNode(new_node, None, None, None, True, False, new_node.children_labels[attr_name]) new_node_children_nodes[str(attr_name)] = temp_node tree_nodes.append(temp_node) new_node.children_nodes = new_node_children_nodes else: continue check_for_pull_up() for pre, fill, node in RenderTree(tree_root.visual_node): print("%s%s" % (pre, node.name))
def choose_root(): global current_labels global current_instances global tree_nodes global visual_nodes global tree_root global blocked_features tree_nodes = [] feature_dict = {} for feature in list(current_instances[0].feature_dict.keys()): if (feature not in blocked_features): feature_dict[feature] = calculate_global_entropy( current_instances) - get_feature_entropy(feature) #print(feature_dict) feature_name = max(feature_dict, key=feature_dict.get) node = TreeNode(parent=None, feature_name=str(feature_name), children_labels=None, children_nodes=None, is_leaf=True, is_feature=True, labels=get_global_labels()) root = Node(node.feature_name) blocked_features.append(node.feature_name) visual_nodes.append(root) tree_nodes.append(node) tree_root = node node_children_labels = {} node_children_nodes = {} for attr in get_feature_attrs(node.feature_name): temp_node = TreeNode(parent=node, feature_name=None, children_labels=None, children_nodes=None, is_leaf=True, is_feature=False, labels=get_feature_attrs(node.feature_name)[attr]) temp_visual_node = Node(str(attr) + "[" + str(temp_node.labels[0]) + "," + str(temp_node.labels[1]) + "]", parent=root) node_children_labels[str(attr)] = temp_node.labels node_children_nodes[str(attr)] = temp_node tree_nodes.append(temp_node) node.children_labels = node_children_labels node.children_nodes = node_children_nodes current_instances = [] #print(node.feature_name) return
def create_tree(ins_array): global tree_root global blocked_features for ins in ins_array: (should_change, new_node_place, child_node, parent_node) = tree_root.add_instance(ins) if (should_change): blocked_features = list(set().union( blocked_features, list(child_node.get_path_to_root().keys()))) #print(child_node.get_path_to_root()) #print(blocked_features) related_instances = query(child_node.get_path_to_root()) new_feature_name = get_best_feature(parent_node, related_instances) if (new_feature_name != None): blocked_features.append(new_feature_name) #### (new_featre_total_labels, new_feature_attr_labels) = get_feature_labels( new_feature_name, related_instances) new_node = TreeNode(parent_node, new_feature_name, new_feature_attr_labels, None, True, True, new_featre_total_labels) parent_node.children_nodes[ new_node_place].visual_node.parent = None parent_node.children_nodes[new_node_place] = new_node try: tree_nodes.remove(tree_nodes.index(child_node)) except: pass new_node_children_nodes = {} new_node_children_labels = {} tree_nodes.append(new_node) for attr_name in list(new_node.children_labels.keys()): temp_node = TreeNode(new_node, None, None, None, True, False, new_node.children_labels[attr_name]) new_node_children_nodes[str(attr_name)] = temp_node tree_nodes.append(temp_node) new_node.children_nodes = new_node_children_nodes else: continue return
def check_for_pullup(): global tree_nodes global tree_root for key in tree_root.children_nodes.keys(): n = tree_root.children_nodes[key] now_instances = tree_root.seen_instances if ((n.is_feature)): print(n.feature_name) if (n.entropy() < n.parent.entropy()): node = TreeNode(parent=None, feature_name=str(n.feature_name), children_labels=None, children_nodes=None, is_leaf=True, is_feature=True, labels=n.labels) tree_root = node node_children_labels = {} node_children_nodes = {} for attr in n.children_nodes.keys(): print(attr) temp_node = TreeNode(parent=node, feature_name=n.parent.feature_name, children_labels=None, children_nodes=None, is_leaf=True, is_feature=False, labels=get_feature_attrs( node.feature_name)[attr]) node_children_labels[str(attr)] = temp_node.labels node_children_nodes[str(attr)] = temp_node tree_nodes.append(temp_node) node.children_labels = node_children_labels node.children_nodes = node_children_nodes create_tree(now_instances) print("------------") return