def runTree(self, featureValues: typ.Dict[int, float], classId: int, terminals: typ.Dict[int, typ.List[int]]) -> float: """ runTree is a wrapper for runNode & is used to __transform provided data by walking the decision tree :param featureValues: The dictionary mapping feature ids to their values (in the current instance). :param classId: The class the tree is meant to identify (this is used to find the terminal values). :param terminals: The dictionary that maps class ids to their relevant features. :type featureValues: dict :type classId: The value of a terminal, or the value computed by one or more operations. :type terminals: dict :returns: The final value that the decision tree creates given the provided data. :rtype: float """ try: value = self.__runNode(featureValues, self._root, classId, terminals) except Exception as err: lineNm = sys.exc_info()[-1].tb_lineno printError(f'runTree found an error on line {lineNm}: {str(err)}') print('') print(self) sys.exit(-1) return value
def checkForDuplicateKeys(self, otherTree: "Tree"): """ Given two trees, check them for duplicate keys """ # check that there aren't any duplicate keys duplicates = [] for key1 in otherTree._nodes.keys(): # for every key in subtree, if key1 in self._nodes.keys(): # if that key is also in this tree, duplicates.append(key1) # add the key to the list of copies try: if duplicates: # if duplicates were found, raise an error raise DuplicateNodeError(keyList=duplicates) except DuplicateNodeError as err: lineNm = sys.exc_info()[-1].tb_lineno # get the line number of error log.error(f'line = {lineNm}, {str(err)}') # log the error printError(''.join(traceback.format_stack())) # print stack trace printError(f'On line {lineNm} DuplicateNodeError encountered') # print message print('Duplicate(s):') pprint.pprint(duplicates) # print the list of duplicate nodes print('Subtree:') pprint.pprint(list(otherTree._nodes.keys())) for k in duplicates: # loop over the duplicates # print the data same = self._nodes.get(k) is otherTree._nodes.get(k) print(f'For Key: {k}') print(f'Do they share a memory address? {same}') print(f'Data in Original Node: {self._nodes.get(k).data}') print(f'Data in Subtree Node: {otherTree._nodes.get(k).data}') print(f'Children of Original Node: {self._nodes.get(k).children}') print(f'Children of Subtree Node: {otherTree._nodes.get(k).children}\n') sys.exit(-1) # exit on error; recovery not possible
def generateNewIDs(self): # * Update the Tree's ID self._ID = str(uuid.uuid4()) # * Update the IDs of the Nodes * # self._generateNewNodeIDs(self.root.ID) try: # if the Root ID is still in the tree if self.root.ID in self._nodes.keys(): return else: raise ValueError except ValueError: printError('Error: GenerateNewIDs corrupted root') log.error('Error: GenerateNewIDs corrupted root') printError(''.join(traceback.format_stack())) # print stack trace log.error(''.join(traceback.format_stack())) print(f'Root Node: {self.root}') log.error(f'Root Node: {self.root}') print('Tree:') log.error('Tree:') print(self.__print_tree_simple()) # print tree log.error(self.__print_tree_simple()) sys.exit(-1)
def test_main(): try: # * Create the Test Trees * # test_tree1 = create_tree1() # create tree 1 test_tree2 = create_tree2() # create tree 2 # check_rDelete(test_tree1) # * Test __rDelete * # # check_remove_from_tree(test_tree1) # * Test removeFromTree * # # check_cross(test_tree1, test_tree2) # * Test Crossover * # # test_search(test_tree1) # * Test __rSearch * # test_id_gen(test_tree1) except KeyError as err: lineNm = sys.exc_info()[-1].tb_lineno # get the line number of error message: str = ''.join( traceback.format_stack()) # traceback to message message += f'\nKeyError encountered on line {lineNm} in TreeTest.py' message += f'\n{str(err)}' # print the message printError(message) # print message print('Tree 1') # print(test_tree1) print('\nTree2') # print(test_tree2) sys.exit(-1) # exit on error; recovery not possible
def __str__(self): out: str try: # attempt to use better print method # call recursive print starting with root out = self.__print_tree() # if we aren't able to use the nicer print, use simple except Exception as err: printError(f'Encountered an error while printing tree: {str(err)}') printError('Switching to simple print...') # call the simpler print, overriding anything in out out = self.__print_tree_simple() return out
def getRandomNode(self) -> str: """ Get a random node from the tree (leaves are allowed)""" try: options: typ.List[str] = list(self._nodes.keys()) options.remove(self._root.ID) except ValueError as err: lineNm = sys.exc_info()[-1].tb_lineno # get the line number of error printError(f'GetRandomNode encountered an error on line {lineNm}: {str(err)}') printError(''.join(traceback.format_stack())) # print stack trace print(self.__print_tree_simple()) # print tree sys.exit(-1) return random.choice(options)
def print_tree(tree: Tree, nodeID: str, indent: str, isLast: bool): if nodeID is None: return try: node: Node = tree.getNode(nodeID) except NotInTreeError: lineNm = sys.exc_info()[-1].tb_lineno # get the line number of error message: str = ''.join( traceback.format_stack()) # traceback to message message += f'\nNotInTreeError encountered on line {lineNm} in TreeTest.py' message += f'\nKey = {nodeID}\n{tree}' # print the tree & node printError(message) # print message sys.exit(-1) # exit on error; recovery not possible isLeaf: bool = node.isLeaf() if nodeID == tree.root.ID: # if this is the root of a tree print(f'{indent}{str(node)}') # print this node indent += " " print(f"{indent}\u2503") elif isLast: # if this is the last child of a node print(f'{indent}\u2517\u2501\u2501{str(node)}') # print this node indent += " " if isLeaf: # if it is a leaf, don't print the extra bar print(f"{indent}") else: print(f"{indent}\u2503") else: # if this is not the last child print(f'{indent}\u2523\u2501\u2501{str(node)}') # print this node indent += "\u2503 " if isLeaf: # if it is a leaf, don't print the extra bar print(f"{indent}") else: print(f"{indent}\u2503") children = ('left', 'middle', 'right') for child in children: if child == 'left' and (node.left is not None): print_tree(tree, node.left, indent, False) elif child == 'middle' and (node.left is not None): print_tree(tree, node.middle, indent, False) elif child == 'right' and (node.left is not None): print_tree(tree, node.right, indent, True) return
def removeSubtree(self, newRootID: str) -> ("Tree", str, str): # NOTE: removeSubtree has been tested & works # if the node is in the tree if self._nodes.get(newRootID): # get the parents id & branch parentOfSubtreeID: str = self._nodes[newRootID].parent orphanBranch: str = self._nodes[newRootID].branch if orphanBranch is None: printError(f'Found None branch on Node with ID {newRootID}') print(self) if parentOfSubtreeID is None: # see if the parent is None printError('Parent Stored in Node was stored as None') raise MissingNodeError(role='Parent', ID=newRootID) # *** Create/Get the New Root *** # rt = self._nodes[newRootID] # get the root of the subtree # *** Create a Copy of the Tree Below the Root, Starting with Root *** # self._copyDictionary = {} # make sure the copy dictionary is empty self.__rDelete(newRootID, rootID=newRootID, makeCopy=True) # copy the subtree & delete it from original # *** Build a new Subtree Using the Copy *** # # NOTE: we set ID to self so we can check that it isn't added back to the same tree subtree: Tree = Tree(root=rt, nodes=self._copyDictionary, ID=self.ID) self._copyDictionary = {} # empty copyDictionary return subtree, parentOfSubtreeID, orphanBranch else: # if the key is bad, raise an error try: raise NotInTreeError(newRootID) except NotInTreeError: lineNm = sys.exc_info()[-1].tb_lineno # get the line number of error printError(f'NotInTreeError encountered by removeSubtree on line {lineNm} of Tree.py') printError(f'Node with ID {newRootID} could not be found in tree by removeSubtree') print(self) # print the tree print('\n') printError(''.join(traceback.format_stack())) # print stack trace sys.exit(-1) # exit on error; recovery not possible
def addSubtree(self, subtree: "Tree", newParent: str, orphanBranch: str): # *** Error Checking *** # # check that parent id is valid if newParent is None: print('addSubtree was given a None (root?) newParent') if self._nodes.get(newParent) is None: raise MissingNodeError(msg=f'addSubtree could not find it\'s new parent') # check that we aren't adding the subtree back onto it's original tree if self.ID == subtree.ID: printError(f'AddSubtree attempted to add itself back to it\'s original tree') raise AssertionError # *** End of Error Checking *** # # set the adopted parents to point to the subtree if orphanBranch == 'left': self._nodes[newParent].left = subtree._root.ID elif orphanBranch == 'right': self._nodes[newParent].right = subtree._root.ID elif orphanBranch == 'middle': self._nodes[newParent].middle = subtree._root.ID else: message: str = f"addSubtree encountered an invalid branch\nParent: {newParent}\nBranch: {orphanBranch}\n Subtree:\n{subtree}" raise InvalidBranchError(message) # set the subtree root to point to adopted parents subtree._root.parent = newParent # set the subtree's branch subtree._root.branch = orphanBranch subtree._root.isRoot = False # this is no longer a root Node # check for duplicate nodes # self.checkForDuplicateKeys(subtree) # it is now safe to add subtree to dictionary of nodes self._nodes.update(subtree._nodes) # delete the subtree from memory now that it's been copied del subtree return
def __runNode(self, featureValues: typ.Dict[int, float], node: Node, classId: int, terminals: typ.Dict[int, typ.List[int]]) -> typ.Union[int, float]: """ __runNode is used to transform provided data by walking the decision tree. :param featureValues: The dictionary mapping feature ids to their values (in the current instance). :param node: The node being examined (this is used during recursion). :param classId: The class the tree is meant to identify (this is used to find the terminal values). :param terminals: The dictionary that maps class ids to their relevant features. :type featureValues: dict :type node: Node :type classId: The value of a terminal, or the value computed by one or more operations. :type terminals: dict :returns: The transformed value. :rtype: float """ try: if node.data in OPS: # if the node is an OP # *************************** Error Checking *************************** # # ! For Debugging Only # lftNone: bool = self.getLeft(node) is None # is left None? # rgtNone: bool = self.getRight(node) is None # is right None? # xor: bool = (lftNone and not rgtNone) or (not lftNone and rgtNone) # exclusive or # if xor: # if one child is None, but not both # raise AssertionError(f'runNode found a node in OPS with 1 \'None\' child,\n\t node = {node}') # if lftNone and rgtNone: # if both children are None # raise AssertionError(f'runNode found a node in OPS with 2 \'None\' children,\n\t node = {node}') # if node.data == 'if' and self.getMiddle(node) is None: # if the OP is IF and it has no middle # raise AssertionError('runNode found a node with a IF OP and no middle node') # ************ Determine Which OP is Stored & Run Recursion ************ # left: Node = self.getLeft(node.ID) # get the left child (all OPS wil have a left) right: Node = self.getRight(node.ID) # get the right child (all OPS wil have a right) if node.data == 'add': # if the OP was add vl = (self.__runNode(featureValues, left, classId, terminals) + # left + right self.__runNode(featureValues, right, classId, terminals)) return vl elif node.data == 'subtract': # if the OP was subtract vl = (self.__runNode(featureValues, left, classId, terminals) - # left - right self.__runNode(featureValues, right, classId, terminals)) return vl elif node.data == 'times': # if the OP was multiplication vl = (self.__runNode(featureValues, left, classId, terminals) * # left * right self.__runNode(featureValues, right, classId, terminals)) return vl elif node.data == 'max': # if the OP was max vl = max(self.__runNode(featureValues, left, classId, terminals), # max(left, right) self.__runNode(featureValues, right, classId, terminals)) return vl elif node.data == 'if': # if the OP was if if self.__runNode(featureValues, left, classId, terminals) >= 0: # if the left value is positive, vl = self.__runNode(featureValues, right, classId, terminals) # return the right node else: # if the left value is negative, middle: Node = self.getMiddle(node.ID) # get the middle child vl = self.__runNode(featureValues, middle, classId, terminals) # return the middle node return vl # ********************************************************************* # elif node.data in terminals[classId]: # if the node is a terminal # *************************** Error Checking *************************** # # ! For Debugging Only # if math.isnan(node.data): # if the value stored is a NaN # msg: str = f'NaN stored in tree. Expected a class ID, OPS value, or number, got {node.data}' # raise TypeError(f'ERROR: {msg}') # raise TypeError # # if featureValues[node.data] is None: # if the value stored is a None # raise TypeError(f'featureValues contained a None at index {node.data}') # ************************ Return Terminal Value ************************ # return featureValues[node.data] # if the terminal is valid, return it # *********************************************************************** # else: # if the node is not a terminal or a OP raise TypeError(f'runNode could not parse data in tree, data ={node.data}') except (TypeError, AssertionError) as err: # catch any exceptions lineNm = sys.exc_info()[-1].tb_lineno # get the line number of error log.error(f'line = {lineNm}, {str(err)}') # log the error printError(f'line = {lineNm}, {str(err)}') # print message printError(''.join(traceback.format_stack())) # print stack trace sys.exit(-1) # exit on error; recovery not possible except Exception as err: lineNm = sys.exc_info()[-1].tb_lineno printError(f'runNode found an error on line {lineNm}: {str(err)}') print('') print(self) sys.exit(-1)
def __rDelete(self, currentID: str, rootID: str, makeCopy: bool = False): """ This will delete a subtree from the original tree (storing it in copyDictionary if requested). NOTE: __rDelete has been tested & works as expected """ current: Node = self._nodes.get(currentID) # get the current Node if current is None: # if get failed return # * If This is the Subtree's Root, Deal with Parents Still in Tree * # if currentID == rootID: # if we are looking at the root of the subtree branch: str = current.branch # get what branch of parent current is on parentID: str = current.parent # get the parents ID parent: Node = self._nodes.get(parentID) # get the parent Node # if parent IS None they this is root so don't mess with parents if parent is not None: current.branch = None # * Root is Not on a Branch so Set to Null * # # * Deal with Parent's Left/Right/Middle Value * # if branch == 'left': parent.left = None elif branch == 'right': parent.right = None elif branch == 'middle': parent.middle = None if self._nodes.get(currentID): # if the current node is in the tree # *** Recursion *** # # delete the left node self.__rDelete(self._nodes[currentID].left, rootID, makeCopy) # delete the right node self.__rDelete(self._nodes[currentID].right, rootID, makeCopy) # delete the middle node self.__rDelete(self._nodes[currentID].middle, rootID, makeCopy) # *** End of Recursion *** # # after we have reached the leaves of the tree, return up # the stack, deleting/copying as we go # *** Copy *** # if makeCopy: # if we are creating a subtree # this should not raise a key error because of the earlier IF statement # NOTE: don't use copy as that will generate new nodes & change node IDs self._copyDictionary[currentID] = self._nodes[currentID] # *** End of Copy *** # # *** Delete Current Node from Original Tree *** # del self._nodes[currentID] # if we have hit the bottom of the tree, or node didn't have child elif currentID is None: return else: # if the node is not in the tree, raise an error try: raise NotInTreeError(currentID) except NotInTreeError: lineNm = sys.exc_info()[-1].tb_lineno # get the line number of error printError(f'NotInTreeError encountered on line {lineNm} of Tree.py') printError(f'Node with ID {currentID} could not be found in tree by rDelete()') print(self) # print the tree print('\n') printError(''.join(traceback.format_stack())) # print stack trace sys.exit(-1) # exit on error; recovery not possible
def getDepth(self, targetID: str, currentID: str, depth=0) -> int: # * If the Current Node is Root Return Depth * # if currentID == self.root.ID: return depth # * Get the Current Node * # try: if currentID is None: # if the ID is None raise AssertionError('getDepth was given a currentID of None') current: Node = self._nodes[currentID] # this might raise a key error if current is None: # if the Node could not be indexed raise NullNodeError(currentID) except AssertionError: lineNm = sys.exc_info()[-1].tb_lineno # get the line number of error msg: str = f"geDepth was passed a currentID of None: line {lineNm}" log.error(msg) printError(msg) printError(f'ID is of Type: {type(currentID)}') print(f"\n{self}") printError(''.join(traceback.format_stack())) # print stack trace sys.exit(-1) # exit on error; recovery not possible except KeyError: lineNm = sys.exc_info()[-1].tb_lineno # get the line number of error msg: str = f"geDepth was passed a currentID not in the Tree: line {lineNm}\nID: {currentID}" log.error(msg) printError(msg) printError(f'ID is of Type: {type(currentID)}') print(f"\n{self}") printError(''.join(traceback.format_stack())) # print stack trace sys.exit(-1) # exit on error; recovery not possible except NotInTreeError: lineNm = sys.exc_info()[-1].tb_lineno # get the line number of error msg: str = f'getDepth found that the Node with ID {currentID} was None: line {lineNm}' log.error(msg) printError(msg) printError(f'ID is of Type: {type(currentID)}') print(f"\n{self}") printError(''.join(traceback.format_stack())) # print stack trace sys.exit(-1) # exit on error; recovery not possible # * Get the Parent of the Current Node * # parent = self._nodes.get(current.parent) if parent is None: # if the Node could not be indexed raise NotInTreeError(f'newSearch could not find parent of Node with ID {currentID}') # * Step Up the Tree Increasing Depth * # return self.getDepth(targetID, parent.ID, depth+1)
def create_tree1() -> Tree: """ Creates a tree of a predetermined structure """ # * Create a New Tree Object * # test_tree: Tree = Tree() test_tree.ID = 'TREE_1' # * Create a New Root Node * # root: Node = Node(data='add') # create a root node for the tree rootID = root.ID # get the root ID (this will be tested later) # * Override the Old Root in the Tree * # test_tree.overrideRoot(root) # * Test RootID * # if test_tree.root.ID != rootID: # print error if there's a problem with root's ID printError(f'Root ID {rootID} & {test_tree.root.ID} do not match') rootID = test_tree.root.ID # update rootID to avoid issues # * Root is ADD so create two children * # test_tree.addLeft(parentID=rootID, data='subtract') # create a SUBTRACT node test_tree.addRight(parentID=rootID, data='max') # create a MAX node # get the IDs of both children root_left: str = test_tree.getLeft(rootID).ID root_right: str = test_tree.getRight(rootID).ID # * Root -> Left is SUBTRACT so add two children * # test_tree.addLeft(parentID=root_left, data='max') # create a MAX node test_tree.addRight(parentID=root_left, data='times') # create a TIMES node # get the IDs of both children root_left_left: str = test_tree.getLeft(root_left).ID root_left_right: str = test_tree.getRight(root_left).ID # * Root -> Right is MAX so add two children * # test_tree.addLeft(parentID=root_right, data='if') # create a IF node test_tree.addRight(parentID=root_right, data='add') # create a ADD node # get the IDs of both children root_right_left: str = test_tree.getLeft(root_right).ID root_right_right: str = test_tree.getRight(root_right).ID # * Root -> Left -> Left is MAX so add two children * # test_tree.addLeft(parentID=root_left_left, data=3) # create a TERMINAL node test_tree.addRight(parentID=root_left_left, data=5) # create a TERMINAL node # get the IDs of both children root_left_left_left: str = test_tree.getLeft(root_left_left).ID root_left_left_right: str = test_tree.getRight(root_left_left).ID # * Root -> Left -> Right is TIMES so add two children * # test_tree.addLeft(parentID=root_left_right, data=12) # create a TERMINAL node test_tree.addRight(parentID=root_left_right, data='add') # create a ADD node # get the IDs of both children root_left_right_left: str = test_tree.getLeft(root_left_right).ID root_left_right_right: str = test_tree.getRight(root_left_right).ID # * Root -> Left -> Right -> Right is ADD so add two children * # test_tree.addLeft(parentID=root_left_right_right, data=1) # create a TERMINAL node test_tree.addRight(parentID=root_left_right_right, data=8) # create a TERMINAL node # get the IDs of both children root_left_right_right_left: str = test_tree.getLeft( root_left_right_right).ID root_left_right_right_Right: str = test_tree.getRight( root_left_right_right).ID # * Root -> Right -> Right is ADD so add two children * # test_tree.addLeft(parentID=root_right_right, data=4) # create a TERMINAL node test_tree.addRight(parentID=root_right_right, data=9) # create a TERMINAL node # get the IDs of both children root_right_right_right: str = test_tree.getLeft(root_right_right).ID root_right_right_left: str = test_tree.getRight(root_right_right).ID # * Root -> Right -> Left is IF so add three children * # test_tree.addLeft(parentID=root_right_left, data=15) # create a TERMINAL node test_tree.addMiddle(parentID=root_right_left, data=1) # create a TERMINAL node test_tree.addRight(parentID=root_right_left, data=7) # create a TERMINAL node # get the IDs of both children root_right_left_left: str = test_tree.getLeft(root_right_left).ID root_right_left_middle: str = test_tree.getMiddle(root_right_left).ID root_right_left_right: str = test_tree.getRight(root_right_left).ID # * Create a list of all the Terminal Node IDS (these are the tree's leaves) * # global TERMINAL_NODES1 TERMINAL_NODES1 = [ root_left_left_left, root_left_left_right, root_left_right_left, root_left_right_right_left, root_left_right_right_Right, root_right_right_right, root_right_right_left, root_right_left_left, root_right_left_middle, root_right_left_right ] print('Tree 1 Created:') print_init(test_tree) # print the constructed tree return test_tree
def create_tree2() -> Tree: # * Create a New Tree Object * # test_tree: Tree = Tree() test_tree.ID = 'TREE_2' # * Create a New Root Node * # root: Node = Node(data='add') # create a root node for the tree rootID = root.ID # get the root ID (this will be tested later) # * Override the Old Root in the Tree * # test_tree.overrideRoot(root) # * Test RootID * # if test_tree.root.ID != rootID: # print error if there's a problem with root's ID printError(f'Root ID {rootID} & {test_tree.root.ID} do not match') rootID = test_tree.root.ID # update rootID to avoid issues # * Root is MAX so create two children * # test_tree.addLeft(parentID=rootID, data='times') # create a TIMES node test_tree.addRight(parentID=rootID, data='if') # create a IF node # get the IDs of both children root_left: str = test_tree.getLeft(rootID).ID # TIMES root_right: str = test_tree.getRight(rootID).ID # IF # * Root -> Left is TIMES so create two children * # test_tree.addLeft(parentID=root_left, data='add') # create a TIMES node test_tree.addRight(parentID=root_left, data=41) # create a TERMINAL node # get the IDs of both children root_left_left: str = test_tree.getLeft(root_left).ID # ADD root_left_right: str = test_tree.getRight(root_left).ID # TERMINAL # * Root -> Left -> Left is ADD so create two children * # test_tree.addLeft(parentID=root_left_left, data=75) # create a TIMES node test_tree.addRight(parentID=root_left_left, data=76) # create a TERMINAL node # get the IDs of both children root_left_left_left: str = test_tree.getLeft(root_left_left).ID # TERMINAL root_left_left_right: str = test_tree.getRight( root_left_left).ID # TERMINAL # * Root -> Right is IF so create three children * # test_tree.addLeft(parentID=root_right, data=16) # create a TERMINAL node test_tree.addMiddle(parentID=root_right, data=20) # create a TERMINAL node test_tree.addRight(parentID=root_right, data='subtract') # create a SUBTRACT node # get the IDs of both children root_right_left: str = test_tree.getLeft(root_right).ID # TERMINAL root_right_middle: str = test_tree.getMiddle(root_right).ID # TERMINAL root_right_right: str = test_tree.getRight(root_right).ID # SUBTRACT # * Root -> Right -> Right is SUBTRACT so create three children * # test_tree.addLeft(parentID=root_right_right, data=30) # create a TERMINAL node test_tree.addRight(parentID=root_right_right, data=10) # create a TERMINAL node # get the IDs of both children root_right_right_left: str = test_tree.getLeft( root_right_right).ID # TERMINAL root_right_right_right: str = test_tree.getRight( root_right_right).ID # TERMINAL global TERMINAL_NODES2 TERMINAL_NODES2 = [ root_left_right, root_left_left_left, root_left_left_right, root_right_left, root_right_middle, root_right_right, root_right_right_left, root_right_right_right ] print('Tree 2 Created:') print_init(test_tree) # print the constructed tree return test_tree