def test_Insert_Node(self): bst = BST() tNode1 = Node(21) tNode2 = Node(20) treeInsert(bst, tNode1) treeInsert(bst, tNode2) self.assertEqual(2, len(bst)) searched = treeSearch(bst.root, 20) self.assertIsNotNone(searched)
def _insert(self, value, currentNode): if value <= currentNode.value: if currentNode.hasLeftChild(): self._insert(value, currentNode.left) else: currentNode.left = Node(value, parent=currentNode) else: if currentNode.hasRightChild(): self._insert(value, currentNode.right) else: currentNode.right = Node(value, parent=currentNode)
def add_node(self, node, key): if node.key == key: return if node.key > key: if node.left is None: node.left = Node(key) else: self.add_node(node.left, key) else: if node.right is None: node.right = Node(key) else: self.add_node(node.right, key)
def test_Delete_Node(self): bst = BST() treeInsert(bst, Node(21)) treeInsert(bst, Node(20)) treeInsert(bst, Node(35)) treeInsert(bst, Node(64)) treeInsert(bst, Node(32)) self.assertEqual(5, len(bst)) searched = treeSearch(bst.root, 20) self.assertIsNotNone(searched) treeDelete(bst, searched) searchAgain = treeSearch(bst.root, 20) self.assertIsNone(searchAgain)
def randomForest(data, attributes, numAttributes, numDataPoints, numTrees, holdoutSet): trees = [] uncategorizedPoints = 0 numeric = isNumeric(data[0]) for _ in range(numTrees): randomData, randomAttributes = selectRandomData(data, attributes, numAttributes, numDataPoints) currentTree = Node('Root', None) buildTree(randomData, randomAttributes, currentTree, THRESHOLD) trees.append(currentTree) bestClassifs = [] for index, dataPoint in enumerate(holdoutSet): if numeric: classifs = [classifyPointNum(tree, dataPoint) for tree in trees] else: classifs = [classifyPointCat(tree, dataPoint) for tree in trees] classifs = list(filter(lambda x: x is not None, classifs)) if len(classifs) == 0: uncategorizedPoints += 1 continue mostFreqClassif = max(classifs, key=classifs.count) bestClassifs.append((dataPoint['id'], mostFreqClassif)) return bestClassifs, uncategorizedPoints
class BST: def __init__(self, val): self.root = Node(val) def setRoot(self, val): self.root = Node(val) def getRoot(self): return self.root.get() def insert(self, val): self.root.insert(val) def search(self, val): return self.root.search(val)
def insert(self, key): if self.root is None: self.root = Node(key) self.count += 1 else: self.add_node(self.root, key) self.count += 1
def __init__(self, level, threshold, num_label, file): self.__level = level self.__threshold = threshold data = xlrd.open_workbook(file) sheet = data.sheets() # read file in to input table for col in range(0, sheet[0].ncols - num_label): att = [] for row in range(0, sheet[0].nrows): att.append(sheet[0].cell(row, col).value) self.__table.append(att) # read file in to label table for col in range(sheet[0].ncols - num_label, sheet[0].ncols): att = [] for row in range(0, sheet[0].nrows): att.append(sheet[0].cell(row, col).value) self.__label_table.append(att) for att in range(0, 1): self.__label.clear() t = copy.copy(self.__table) t.append(self.__label_table[att]) self.__label = self.__classifiedAtt__(len(t) - 1, t) root = Node(None, None, None) self.__generateTree__(0, root, t) self.__tree.append(root) print("tree is create " + str(att)) return
def main(): data = csvParser.parse(sys.argv[1]) attributes = list(data[0].keys()) root = Node('Root', None) build(data, attributes, root, 0.01) print(root.name) for child in root.children: print(child.name)
def main(): # CHANGED TO READ FROM SPECIFIC FILE NOT ARGV[1] data = csvParser.parse("trunk/tree03-100-words.csv") attributes = list(data[0].keys()) root = Node('Root', None) build(data, attributes, root, 0.01) s = etree.tostring(outputXML(root), pretty_print=True, encoding='unicode') print(s)
def add_to_tree(self, node, key): if self.root is None: self.root = Node(key) self.count += 1 else: if node.key == key: return if node.key > key: if node.left is None: node.left = Node(key) else: self.add_to_tree(node.left, key) else: if node.right is None: node.right = Node(key) else: self.add_to_tree(node.right, key) self.count += 1
def from_json(self, filename): if self.trained: print( "This tree has already been trained. This procedure will purge the trained rules." ) self.__untrain() with open(filename, "r") as jsonfile: tree_dict = json.load(jsonfile) #load basics try: self.depth_limit = tree_dict["depth"] self.dimensions = tree_dict["dimensions"] levels = tree_dict["levels"] except KeyError: print("Invalid JSON format") return False #first run, just create Node instances so that you can reference them in output/parents for i in xrange(len(levels)): key = "lvl%d" % i level = levels[key] if len(level) > 0: self.nodes.append([]) for r in range(len(level)): n = Node(level=i) self.nodes[i].extend([n]) for key, level in levels.iteritems(): i = int(key[-1]) if len(level) == 0: continue for n, node in enumerate(level): mynode = self.nodes[i][n] mynode.entropy = node["entropy"] if node["terminal"]: mynode.outcome = [node["outcome"]] else: mynode.feature = node["feature"] mynode.threshold = node["threshold"] left_outcome = self.nodes[i + 1][node["outcome"][0]["index"]] right_outcome = self.nodes[i + 1][node["outcome"][1]["index"]] mynode.outcome = [left_outcome, right_outcome] mynode.terminal = False if i > 0: parent = self.nodes[i - 1][node["parent_index"]] mynode.parent = parent self.trained = True return self
class Tree: def __init__(self, rootVal): self.root = Node(rootVal) def addChild(self, newNode): kek = self.root.addChild(newNode) # print(kek) return kek def levelPrint(self): self.root.x = 0 self.root.y = 0 self.root.z = 0 level_counter = 0 self.root.printCoords() currentLevel = [self.root] nextLevel = [] while len(currentLevel) > 0: level_counter += 1 vals = [] for child in currentLevel: vals += [child.getValue()] num_of_childs = len(child.getChildren()) child_count = 0 for child_1 in child.getChildren(): degrees = getDegrees(child_count, num_of_childs) child_1.setZ(level_counter) child_1.setX(child.x, degrees) child_1.setY(child.y, degrees) child_1.printCoords() pEdeges(child, child_1) child_count += 1 nextLevel += [child_1] print(vals) currentLevel = nextLevel nextLevel = []
def build(data, attributes, tree, threshold): if isUniform(d['Class'] for d in data): # All class labels are the same tree.setName(data[0]['Class']) elif len(attributes) == 0: # No more attributes tree.setName(mostFrequentCategory(data)) else: bestAttribute = selectSplittingAttributeN(attributes, data, threshold) if not bestAttribute: # No best attribute to split on tree.setName(mostFrequentCategory(data)) else: valToSplit = bestAttribute[1] # -1 if attribute is categorical bestAttribute = bestAttribute[0] tree.setName(bestAttribute) if valToSplit > 0: # attribute is continuous # Split data on valToSplit splits = splitOnVal(data, bestAttribute, valToSplit) # Recursive call on data <= split val nodeLT = Node(None, "<= {}".format(valToSplit)) tree.addChild(nodeLT) build(splits[0], attributes, nodeLT, threshold) # Recursive call on data > split val nodeGT = Node(None, "> {}".format(valToSplit)) tree.addChild(nodeGT) build(splits[1], attributes, nodeGT, threshold) else: # attribute is categorical attributeDict = groupByAttribute(data, bestAttribute) for attributeName in attributeDict.keys(): newData = attributeDict[attributeName] if len(newData) > 0: newAttributes = list(attributes) newAttributes.remove(bestAttribute) childNode = Node(None, attributeName) tree.addChild(childNode) build(newData, newAttributes, childNode, threshold)
def main(): shrooms = r"E:\Documents\CSC466\Lab 3\466-lab-3.git\trunk\agaricus-lepiota.data.csv" iris = r"E:\Documents\CSC466\Lab 3\466-lab-3.git\trunk\iris.data" d = parsing.parseData(shrooms) data = d[0] attributes = d[1] root = Node('Root', None) dt.build(data, attributes, root, 0.1) print("**", classifyPointCat(root, data[1000])) """if not len(sys.argv) == 3:
def add(self, item): newNode = Node(item) if self.__root is None: self.__root = newNode self.__items.append(self.__root) else: treeNode = self.__items[0] if treeNode.getLChild() is None: treeNode.setLChild(newNode) self.__items.append(treeNode.getLChild()) elif treeNode.getRChild() is None: treeNode.setRChild(newNode) self.__items.append(treeNode.getLChild())
def test(): T = Node(17) T.left = Node(13) T.right = Node(26) T.left.left = Node(14) T.left.right = Node(5) print("Preorder : ") PreorderTraverse(T) print("-------") print("Inorder : ") InorderTraverse(T) print("-------") print("Postorder : ") PostorderTraverse(T) print("-------") print("Levelorder : ") LevelorderTraverse(T)
def main(): t11 = Node(1) t12 = Node(3) t13 = Node(2) t14 = Node(4) t15 = Node(5) t16 = Node(6) list1 = [] list1.append(t12) list1.append(t13) list1.append(t14) list2 = [] list2.append(t15) list2.append(t16) t11.children = list1 t12.children = list2 solution = Solution() output = solution.postorder(t11) print("Root Node: ", output)
def _Decision_Tree(self, tree_node): x_data = tree_node.x_data y_data = tree_node.y_data feature_names = list(x_data.columns) xy_data = pd.concat([x_data, y_data], axis=1) label_name = list(xy_data.columns)[-1] label_unique = xy_data[label_name].unique() if len(label_unique) == 1: cate = y_data.loc[0] tree_node.category = cate return tree_node if len(feature_names) == 0: cate = y_data.value_counts(ascending=False).keys()[0] tree_node.category = cate return tree_node label_entr = self.label_entr(y_data) max_gain = 0 for feature in feature_names: info_gain = self.info_gain(label_entr, feature, label_name, xy_data) if info_gain > max_gain: max_gain = info_gain f_name = feature if max_gain <= self.min_info_gain: cate = y_data.value_counts(ascending=False).keys()[0] tree_node.category = cate return tree_node tree_node.feature = f_name tree_node.children = dict() for sub_attribute in self.feature_item[tree_node.feature]: sub_data = xy_data[xy_data[f_name] == sub_attribute] sub_data_x = sub_data.drop(list(sub_data.columns)[-1], axis=1) sub_data_x.drop(tree_node.feature, axis=1, inplace=True) sub_data_y = sub_data[list(sub_data.columns)[-1]] child_node = Node(tree_node, None, None, None, sub_data_x, sub_data_y) tree_node.children[sub_attribute] = Decision_Tree(child_node) return tree_node
def build(data, attributes, tree, threshold): if isUniform(dict['Category'] for dict in data): tree.setName(data[0]['Category']) elif len(attributes) == 0: tree.setName(mostFrequentCategory(data), None) else: # Select splitting attribute bestAttribute = selectSplittingAttribute(data, attributes, threshold) if not bestAttribute: tree.setName(mostFrequentCategory(data)) else: tree.setName(bestAttribute) attributeDict = groupByAttribute(data, bestAttribute) for attributeName in attributeDict.keys(): newData = attributeDict[attributeName] if len(newData) > 0: newAttributes = list(attributes) newAttributes.remove(bestAttribute) childNode = Node(None, attributeName) tree.addChild(childNode) build(newData, newAttributes, childNode, threshold)
def main(): """ if not len(sys.argv) >= 2: print("\t\tMissing arguments\n\tProper Call :\tpython C45.py <CSVFile> [<Restrictions>]") return dataFile = sys.argv[1] d = parsing.parseData(dataFile) data = d[0] attributes = d[1] if len(sys.argv) == 3: restrFile = sys.argv[2] with open(restrFile, 'r') as file: restr = file.read().split(',') attributes = restrictAttrib(attributes[:-1], restr[1:]) """ # This is all just print testing bs shrooms = r"E:\Documents\CSC466\Lab 3\466-lab-3.git\trunk\agaricus-lepiota.data.csv" iris = r"E:\Documents\CSC466\Lab 3\466-lab-3.git\trunk\iris.data" letters = r"E:\Documents\CSC466\Lab 3\466-lab-3.git\trunk\letter-recognition.data.csv" data, attributes = parsing.parseData(letters) print(attributes) print("Number of records : {}\nWith {} different attributes".format( len(data), len(attributes))) #s = selectSplittingAttributeN(attributes, data, 0.01) #en = entropyBinSplit(data, s[0], s[1]) #print(en) root = Node('Root', None) build(data, attributes, root, 0.3) xmlOutput = etree.tostring(outputXML(root), pretty_print=True, encoding='unicode') print(xmlOutput)
def BranchAndBoundAlgorithm(): queue = deque() sortedItemList = [(index, Density(item)) for index, item in enumerate(ITEMS)] sortedItemList = sorted(sortedItemList, key=lambda x: x[1], reverse=True) bestNode = Node(0, 0.0, 0.0, 0.0, []) root = Node(0, 0.0, 0.0, getUpperBoundValue(bestNode, sortedItemList), []) queue.appendleft(root) while len(queue) > 0: currentNode = queue.pop() if currentNode.bound > bestNode.value: index = sortedItemList[currentNode.treeLevel][0] nextCheckedItemValue = ITEMS[index].value nextCheckedItemWeight = ITEMS[index].weight nextAddednode = Node(currentNode.treeLevel + 1, currentNode.value + nextCheckedItemValue, currentNode.weight + nextCheckedItemWeight, currentNode.bound, currentNode.selectedItems + [index]) if nextAddednode.weight <= KNAPSACK_SIZE: if nextAddednode.value > bestNode.value: bestNode = nextAddednode if nextAddednode.bound > bestNode.value: queue.appendleft(nextAddednode) nextNotAddedNode = Node(currentNode.treeLevel + 1, currentNode.value, currentNode.weight, currentNode.bound, currentNode.selectedItems) nextNotAddedNode.bound = getUpperBoundValue( nextNotAddedNode, sortedItemList) if nextNotAddedNode.bound > bestNode.value: queue.appendleft(nextNotAddedNode) bestSolution = [0] * len(ITEMS) for itemBit in bestNode.selectedItems: bestSolution[itemBit] = 1 return bestSolution, int(bestNode.value)
#if leaf node return 1 (Bcz. leaf node is present at level 1) if root.left == None and root.right == None: return 1 #recursively compute the levels of left and right subtree left_subtree_levels = levels(root.left) right_subtree_levels = levels(root.right) #compute the overall levels of tree total_levels = max(left_subtree_levels, right_subtree_levels) + 1 return total_levels a = Node(1) b = Node(2) c = Node(3) d = Node(4) e = Node(5) f = Node(6) g = Node(7) h = Node(8) a.left = b a.right = c b.left = d b.right = e c.left = f d.left = g g.left = h
def createNode(self, val): node = Node() node.setVal(val) return node
def __algorithm(self, S, labels, level=0, par_node=None, left=False, terminal_flag=False): #calculate initial entropy null_entropy = self.__impurity(labels) #check if everyone is in the same class if null_entropy <= 0. or level >= self.depth_limit or terminal_flag: #terminate the algorithm, everyone's been classified or maximum depth has been reached final_node = Node(parent=par_node,level=level,entropy=null_entropy) final_node.outcome[0] = self.__bestguess(labels) self.nodes[level].extend( [final_node] ) return final_node else: #go over all the features in this dataset features = range(S.shape[1]) min_entropy = np.inf best_split = [0,0] #this will hold feature number and threshold value for the best split for f in features: #try all possible splits along this feature #return the best (lowest) entropy #if this entropy is smaller then current minimum, update Sfeat = S[:,f] split, entropy = self.__bestsplit(Sfeat, labels) if entropy < min_entropy: min_entropy = entropy best_split = [f, split] new_node = Node(feature=best_split[0], threshold=best_split[1], parent=par_node, level=level, entropy=min_entropy) self.nodes[level].extend( [new_node] ) #split dataset #check if S is a vector if len(S.shape) == 1: #S is a one-feature vector S = S.reshape((len(S),1)) leftMask = S[:,best_split[0]] <= best_split[1] rightMask = S[:,best_split[0]] > best_split[1] features.remove(best_split[0]) leftLabels = labels[leftMask] rightLabels = labels[rightMask] # check if you shouldn't terminate here # when the split puts all samples into left or right branch if leftMask.all(): new_node.make_terminal(self.__bestguess(leftLabels)) return new_node if rightMask.all(): new_node.make_terminal(self.__bestguess(rightLabels)) return new_node if len(features) == 0: leftS = S[leftMask,:] rightS = S[rightMask,:] terminal_flag = True else: leftS = S[leftMask,:][:,features] rightS = S[rightMask,:][:,features] #check if you shouldn't terminate here if len(leftS) == 0 or leftS.shape[1] == 0: new_node.make_terminal(self.__bestguess(rightLabels)) return new_node if len(rightS) == 0 or rightS.shape[1] == 0: new_node.make_terminal(self.__bestguess(leftLabels)) return new_node #check if a level below you already exists try: self.nodes[level+1] except IndexError: self.nodes.append([]) #recursively call self again on the two children nodes new_node.outcome[0] = self.__algorithm(leftS,leftLabels,level=level+1,par_node=new_node,terminal_flag=terminal_flag) new_node.outcome[1] = self.__algorithm(rightS,rightLabels,level=level+1,par_node=new_node,terminal_flag=terminal_flag) return new_node
def insert(self, val): if self.root: self._insert(val, self.root) else: self.root = Node(value=val) self.size += 1
if root.left==None and root.right==None: return 0 #recursively compute intermediate nodes in left and right subtree left_subtree_non_leaf=non_leaf(root.left) right_subtree_non_leaf=non_leaf(root.right) #compute the total intermediates node by considering current intermediate node , left_subtree_non_leaf node and right_subtree_non_leaf node total_non_leaf=left_subtree_non_leaf+right_subtree_non_leaf+1 return total_non_leaf a=Node(1) b=Node(2) c=Node(3) d=Node(4) e=Node(5) f=Node(6) g=Node(7) a.left=b a.right=c b.left=d b.right=e c.left=f c.right=g result=non_leaf(a) print(result)
def __init__(self, min_info_gain, x_data, y_data): self.min_info_gain = min_info_gain self.root_node = Node(x_data=x_data, y_data=y_data) self.feature_item = self.get_feature(x_data)
r = self.LCA(a, b, node.right) if l and r: print node.data return node if l: return l if r: return r if __name__ == '__main__': t = BinaryTree() t.add(Node(5)) t.add(Node(2)) t.add(Node(1)) t.add(Node(3)) t.add(Node(4)) t.add(Node(7)) t.add(Node(6)) t.add(Node(8)) t.preOrder(t.root) print '\n=======' t.preOrderIt() print '\n=======' t.inOrder(t.root) print '\n=======' t.postOrder(t.root) print '\n======='
def __init__(self, rootVal): self.root = Node(rootVal)
if root.left.data <= root.data: return is_bst(root.left) else: return False #intermidiate node with right children only elif root.right != None: if root.data <= root.right.data: return is_bst(root.right) else: return False # return False n1 = Node(1) n2 = Node(2) n3 = Node(3) n4 = Node(4) n5 = Node(5) n6 = Node(6) n7 = Node(7) n4.left = n2 n4.right = n6 n2.left = n1 n2.right = n3 n6.left = n5 n6.right = n7 result = is_bst(n4)