def configure_tree_topology(self, root, degree=2, remove=False): """Configures the cluster's network topology as a tree. The tree consists of the specified root node and the nodes, which build the subtrees. The childrens are incrementally chosen, in other words, sequentially as specified in the config file. Arguments: root {integer} -- The tree's root node. Keyword Arguments: degree {integer} -- The maximum number of children (default: {2}) remove {boolean} -- Remove the configuration (default: {False}) """ self.logger.info("Configuring tree topology...") tree = Tree() root_node = self.topology.get_node(root) tree.create_node(root_node.name, root_node.node_id) parent_node = root for nodex in self.topology.nodes: if nodex.node_id == root_node.node_id: continue if len(tree.children(parent_node)) >= degree: if parent_node == root and root != 0: parent_node = 0 elif parent_node + 1 == root: parent_node += 2 else: parent_node += 1 tree.create_node(nodex.name, nodex.node_id, parent_node) self.logger.info("The following tree will be configured:") tree.show() for nodex in self.topology.nodes: self.logger.debug("%s:", nodex.name) subtree = tree.subtree(nodex.node_id) for nodey in self.topology.nodes: if nodex.node_id == nodey.node_id: continue if subtree.contains(nodey.node_id): children = tree.children(nodex.node_id) for child in children: if (child.identifier == nodey.node_id or tree.is_ancestor(child.identifier, nodey.node_id)): nodex.add_forwarding( nodey, self.topology.get_node(child.identifier)) break elif tree.parent(nodex.node_id) != None: nodex.add_forwarding( nodey, self.topology.get_node( tree.parent(nodex.node_id).identifier)) if not self.testing: self.topology.send_forwarding_tables(remove)
def fit(self, X): '''Learn the n-gram distribution from the given dataset. Parameters ---------- X: list of sequences The training set. ''' tree = Tree() root = tree.create_node('$', data=self.Payload(count=0, freq=0)) for seq in X: ptrs = deque() for symbol in seq: ptrs.append(root) if len(ptrs) > self.n + 1: ptrs.popleft() for i, p in enumerate(ptrs): try: next, = [ c for c in tree.children(p.identifier) if c.tag == symbol ] next.data.count += 1 except ValueError: next = tree.create_node(tag=symbol, parent=p.identifier, data=self.Payload(count=1, freq=0)) ptrs[i] = next self.tree = tree
def endpoint_cal(swc_p, unit , sep = ","): """ generate a multiBranch Tree from the swc file """ print(unit, sep) coords, labels, ids, pars = coords_get(swc_p, unit, sep) #coords += 1 if len(coords) == 0: print("{} is something wrong".format(swc_p)) sys.exit(0) ftree = Tree() ftree.create_node(ids[0], ids[0], data = coords[0]) for coord_, id_, par_ in zip(coords[1:], ids[1:], pars[1:]): #print(id_, par_) ftree.create_node(id_, id_, data = coord_, parent = par_) endpoint_coords = [x.data for x in ftree.leaves()] endpoint_coords.append(coords[0]) branch_coords = [x.data for x in ftree.all_nodes() if len(ftree.children(x.tag)) >= 2] endpoint_coords = np.array(endpoint_coords) branch_coords = np.array(branch_coords) coords = np.array(coords) return endpoint_coords, branch_coords, coords, ftree
def is_projective_tree(self, parse_tree: Tree): is_violateing = [ len(parse_tree.children(node)) > 2 for node in parse_tree.expand_tree() ] if any(is_violateing): print('here') return not any(is_violateing)
def compare_actual_folder_with_tree(self, root: path, tree: Tree): root_name = tree.root root_path = root.joinpath(root_name) print(root_path) self.assertTrue(root_path.exists(), "The path {} should exist, but doesn't".format(root_path)) children = tree.children(root_name) for children in children: subtree = tree.subtree(children.identifier) self.compare_actual_folder_with_tree(root_path, subtree)
def tree(): # 创建类别标签 cat_tree = Tree() cat_tree.create_node((-1, "root"), -1) for i in range(10): cat_tree.create_node((i, str(i)), i, parent=-1) offset = 10 for i in range(10): for j in range(5): cur = offset + j cat_tree.create_node((cur, str(cur)), cur, parent=i) offset += 5 header1_list = [node.tag for node in cat_tree.children(-1)] header2_list = [] for cat1 in header1_list: children = cat_tree.children(cat1[0]) header2_list.append([node.tag for node in children])
def render_org_tree(tree: Tree, node: Node, payload='') -> str: parent = tree.parent(node.identifier) if parent and parent.identifier.lower() == 'vocabulary': payload += render_vocabulary_word(tree, node) else: depth = tree.depth(node) + 1 payload += ('*' * depth + ' ' + node.tag + '\n') for child in tree.children(node.identifier): payload += render_org_tree(tree, tree[child.identifier]) return payload
def find_nodes_that_contains_more_than_three_children(data): res = [] tree = Tree() root = tree.create_node("root", "root") tree = build_tree(data=data, tree=tree, parent=root) nodes = tree.nodes for node in nodes: if node != "root" and len(tree.children(node)) >= 3: res.append(node) return set(res)
def trim_excess_root(tree: Tree) -> Tree: # Remove any nodes from the root that have only 1 child. # I.e, replace A → B → (C, D) with B → (C, D) root_id = tree.root branches = tree.children(root_id) if len(branches) == 1: tree.update_node(branches[0].identifier, parent=None, bpointer=None) new_tree = tree.subtree(branches[0].identifier) return trim_excess_root(new_tree) else: return tree
def map_tree_to_program(self, tree: Tree) -> str: self._node_to_subprog = {} frontier = [] # Tree nodes that are left to be explored for leaf in tree.leaves(): span = leaf.data.span self._node_to_subprog[span] = self._node_to_type(leaf) parent = tree.parent(leaf.identifier) if parent and parent not in frontier: frontier.append(tree.parent(leaf.identifier)) while frontier: node = frontier.pop() children = tree.children(node.identifier) assert len(children) == 2 # check if children were already discovered if not all([ child.data.span in self._node_to_subprog for child in children ]): frontier.insert(0, node) continue child_1 = self._node_to_subprog[children[0].data.span] child_2 = self._node_to_subprog[children[1].data.span] try: if child_1 and not child_2: # child_2=='NO_LABEL' self._node_to_subprog[node.data.span] = child_1 elif not child_1 and child_2: # child_1=='NO_LABEL' self._node_to_subprog[node.data.span] = child_2 elif not child_1 and not child_2: # Both children are assigned with 'NO_LABEL' self._node_to_subprog[node.data.span] = self._node_to_type( node) # ignore children and propagate parent else: assert child_2.is_full( ) # make sure child_2 value can be formed self._node_to_subprog[node.data.span] = child_1.apply( child_2) except Exception as e: try: self._node_to_subprog[node.data.span] = child_2.apply( child_1) except Exception as e: raise Exception('final apply_exception: {}'.format(e)) parent = tree.parent(node.identifier) if parent and parent not in frontier: frontier.insert(0, parent) inner_program = self._node_to_subprog[tree.get_node( tree.root).data.span].get_value() # return the root's value return inner_program
def __init__(self, holes=0): self.data = np.zeros((3, 3, 3, 3), dtype='int') element = range(3) order = direct_product(element, element, element, element) i = 0 genTree = Tree() root = Node(i, 'root', data=[order[0], self.data.copy()]) genTree.add_node(root) currentNode = root getData = lambda node: node.data[1][tuple(node.data[0])] while i < len(order): i += 1 a, b, c, d = order[i - 1] numPool = pool(self.data, a, b, c, d) - set( map(getData, genTree.children(currentNode.identifier))) if numPool: self.data[a, b, c, d] = np.random.choice(list(numPool)) node = Node(i, data=[order[i - 1], self.data.copy()]) genTree.add_node(node, currentNode) currentNode = node else: prev = genTree.parent(currentNode.identifier) while len(genTree.children(prev.identifier)) == len( pool(prev.data[1], *(prev.data[0]))): currentNode = prev prev = genTree.parent(currentNode.identifier) else: currentNode = prev self.data = currentNode.data[1].copy() i = currentNode.tag continue h = np.random.choice(len(order), size=holes, replace=False) self._answer = self.data.copy() self.holes = np.array(order)[h] self.data[tuple(self.holes.T.tolist())] = 0
def collapse(t1: tl.Tree, t2: tl.Tree) -> tl.Tree: # work with copies. t1 = tl.Tree(tree=t1, deep=True) t2 = tl.Tree(tree=t2, deep=True) # reset all the identifiers: t1 = reset_ids(t1) t2 = reset_ids(t2) # paste all the children of t2 into the root of t1 for child in t2.children(t2.root): t1.paste(t1.root, t2.subtree(child.identifier)) return t1
def get_metadata(node: Node, tree: Tree, *, \ include_count=True, include_attrs=False, include_depth=True) -> Dict: data = {'id': node.identifier, 'tag': node.tag} if include_count: data['childrenCount'] = len(tree.children(node.identifier) or []) if include_depth: data['depth'] = tree.depth(node) if include_attrs: data.update(get_attributes(node)) return data
def get_intersection_tree(T1, T2): T = Tree(tree=T1, deep=True) T1_bfs = [n for n in T1.expand_tree(mode=1)] T2_bfs = [n for n in T2.expand_tree(mode=1)] for nid in T1_bfs: X = set(get_leaf_node_ids_for_node(T, nid)) diff = min([len(X.symmetric_difference(set( \ get_leaf_node_ids_for_node(T2,i)))) \ for i in T2_bfs]) if diff != 0: par = T.parent(nid).identifier for c in T.children(nid): T.move_node(c.identifier, par) T.remove_subtree(nid) return T
def _build_tree(self, scores: ndarray, bin_edges: ndarray) -> Tree: # Build tree with specified number of children at each level tree = Tree() tree.add_node(Node()) # root node nodes_prev = [tree.get_node(tree.root)] for level in range(self.depth): nodes_current = [] for node in nodes_prev: children = [] for _ in range(self.n_children[level]): child = Node() tree.add_node(child, parent=node) children.append(child) nodes_current.extend(children) nodes_prev = nodes_current assignments = np.digitize(scores, bin_edges) - 1 # Store instance ids in leaves leaves = tree.leaves() for k, node in enumerate(leaves): instance_ids = np.where(assignments == k)[0] if instance_ids.size == 0: tree.remove_node(node.identifier) else: node.data = instance_ids # Prune empty leaves check_for_empty_leaves = True while check_for_empty_leaves: check_for_empty_leaves = False leaves = tree.leaves() for node in leaves: if node.data is None and len(node.successors( tree.identifier)) == 0: # Node is empty and has no siblings tree.remove_node(node.identifier) check_for_empty_leaves = True # Simplify tree: remove nodes that only have one child for nid in tree.expand_tree(mode=tree.WIDTH): children = tree.children(nid) if len(children) == 1: tree.link_past_node(nid) return tree
def create_dummy_download_folder(root: path, tree: Tree) -> path: root_name = tree.root root_path = root.joinpath(root_name) if not root_path.exists(): print("Creating {}".format(root_path)) if root_name.endswith(".mp3"): root_path.touch() else: root_path.mkdir() time.sleep(0.01) # sleep to ensure that the created folders don't have the same ctime children = tree.children(root_name) for children in children: subtree = tree.subtree(children.identifier) create_dummy_download_folder(root_path, subtree) return root_path
def map_tree_to_program(self, tree: Tree) -> str: self._node_to_subprog = {} frontier = [] # Tree nodes that are left to be explored for leaf in tree.leaves(): span = leaf.data.span self._node_to_subprog[span] = self._node_to_type(leaf) parent = tree.parent(leaf.identifier) if parent and parent not in frontier: frontier.append(tree.parent(leaf.identifier)) while frontier: node = frontier.pop() children = tree.children(node.identifier) assert len(children) in [2, 3] # check if children were already discovered if not all([ child.data.span in self._node_to_subprog for child in children ]): frontier.insert(0, node) continue if len(children) == 2: child_1 = self._node_to_subprog[children[0].data.span] child_2 = self._node_to_subprog[children[1].data.span] self._node_to_subprog[node.data.span] = self.merge_children( child_1, child_2, node) else: children.sort(key=lambda c: c.data.span[0]) child_1 = self._node_to_subprog[children[0].data.span] child_2 = self._node_to_subprog[children[1].data.span] child_3 = self._node_to_subprog[children[2].data.span] intermediate = self.merge_children(child_1, child_3, node) self._node_to_subprog[node.data.span] = self.merge_children( child_2, intermediate, node) parent = tree.parent(node.identifier) if parent and parent not in frontier: frontier.insert(0, parent) inner_program = self._node_to_subprog[tree.get_node( tree.root).data.span].get_value() # return the root's value return 'answer ( {} )'.format(inner_program)
def get_service_tree(): s = service.service() column = [ 'code', 'name', 'location_code', 'create_time', 'parent_code', 'parent_name', 'address' ] result = result_to_dic.to_dic(s, column) tree = Tree() tree.create_node('root', 'root') for x in result: if not x['parent_code']: tree.create_node(str(x['code']), str(x['code']), parent='root', data=x['name']) else: tree.create_node(str(x['code']), str(x['code']), parent=x['parent_code'], data=x['name']) def transfer(code): if not tree.children(code): struct = { 'id': code, 'label': tree.nodes[code].data, } return struct struct = {'id': code, 'label': tree.nodes[code].data, 'children': []} for node in tree.children(code): struct['children'].append(transfer(node.tag)) return struct result = [] for x in tree.children('root'): result.append(transfer(x.identifier)) service.db_close() return result
from treelib import Tree tree = Tree() tree.create_node("a", "a", data={"v": 0}) tree.create_node("b", "b", data={"v": 7}, parent="a") tree.create_node("c", "c", data={"v": 4}, parent="b") tree.create_node("d", "d", data={"v": 3}, parent="b") tree.create_node("f", "f", data={"v": 0}, parent="b") tree.create_node("e", "e", data={"v": 3}, parent="a") print(tree) def func_1(node): v = 0 children = tree.children(node.identifier) for child in children: if child.data["v"] == 0 and len(tree.children(child.identifier)) != 0: child.data["v"] = func_1(child) v += child.data["v"] return v for node in tree.all_nodes(): if node.data["v"] == 0 and len(tree.children(node.identifier)) != 0: node.data["v"] = func_1(node) for node in tree.all_nodes(): print("node: " + node.tag + " value: {}".format(node.data["v"]))
class verifier(object): ''' This verifier object is intended to receive an Enfragmo instance file, and then to determine the original formula corresponding to that file. Each subformula which is an atom will be given as a lower-case letter, and nesting of formulas will be indicated by appropriate bracketing. The symbols used to represent operators are as follows: And & Or v Not ~ Implication -> Box box Diamond dia With unary operators applied directly to atoms being dropped. The format of an instance file is assumed to be as follows: TYPE Subformula [ 1.. n] TYPE World [1..m] PREDICATE Atom ... PREDICATE And ... PREDICATE Or ... PREDICATE Not ... PREDICATE Implication ... PREDICATE Box ... PREDICATE Diamond ... PREDICATE SameAtom Where "..." indicates that either singletons, pairs, or triples will occupy the lines below the current PREDICATE delimiter and the next, signifying the appropriate relationship between the subformulas. For example, PREDICATE And (1, 2, 3) Indicates that the main connective of subformula 1 is conjunction, and that subformula 2 and 3 are the two operands for that operator. ''' def __init__(self, filename): ''' Receives the name of the instance file to be verified, then uses this to initialize the corresponding tree structure ''' self.SameAtomList = unionfind.UnionFind() self.filename = filename def readProblemInstanceFile(self): ''' This method assumes that the instance file exists and is correctly formatted. Subformulas are labeled in pre-order DFS traversal fashion, so as to allow the numbering to reflect the operator/operand relationship. ''' self.instanceFileLines = [line.strip() for line in open(self.filename) if line != '\n'] def parseProblemInstanceFile(self): self.countNumTreeNodes() #self.countNumTreeLeaves() #self.countNumAtoms() self.setUpSameAtomList() self.buildTree() def numWorlds(self): return int(self.instanceFileLines[1][-2]) def countNumTreeNodes(self): ''' The number of tree nodes is inherent in the number of subformulas, which is given on the first line of a well-formed problem instance file. In this way, the given formula is considered to be the first subformula, where its main connective labels the root node, and its ''' self.numTreeNodes = int(self.instanceFileLines[0].split(" ")[-1].split("]")[0]) def countNumTreeLeaves(self): ''' The number of tree leaves is simply the number of singletons that satisfy Atom, including duplicates. ''' self.numTreeLeaves = \ int((self.instanceFileLines.index("PREDICATE SameAtom") - 1)- \ self.instanceFileLines.index("PREDICATE Atom")) def countNumAtoms(self): ''' Since multiple subformulas can refer to the same atom, need to subtract the count of pairs satisfying SameAtom from the total count of subformulas satisfying Atom. This can be done by counting the number of entries between each of the appropriate PREDICATE identifiers (given a well-formed instance file). ''' self.numAtoms = int((self.instanceFileLines.index("PREDICATE And")- \ (self.instanceFileLines.index("PREDICATE Atom")+1) - \ (len(self.instanceFileLines) - \ (self.instanceFileLines.index("PREDICATE SameAtom")+1)))) return self.numAtoms def assignSymbol(self, label): if label == "And": return "&" elif label == "Or": return "v" elif label == "Not": return "~" elif label == "Implication": return "->" elif label == "Biconditional": return "<->" elif label == "Box": return "box" elif label == "Diamond": return "dia" def assignAtom(self, i): for atomEquivClass in self.SameAtomList.get_sets(): if str(i) in atomEquivClass: return self.SameAtomList.get_leader(str(i)) self.SameAtomList.insert(str(i)) return self.SameAtomList.get_leader(str(i)) def setUpSameAtomList(self): ''' Using the Union Find datastructure, I will keep track of the equivalence classes of SameAtoms and then supply a label based on the index of the subset in which a subformula corresponding with an atom is contained. ''' startIndexSameAtoms = findInFile(self.instanceFileLines, lambda x: "PREDICATE SameAtom" in x) + 1 sameAtomPairs = self.instanceFileLines[startIndexSameAtoms:] for pair in sameAtomPairs: tmp = pair.split(",") label1 = tmp[0].split("(")[1] label2 = tmp[1].split(")")[0] self.SameAtomList.insert(label1, label2) def determineConnective(self, i): ''' Each subformula label is guaranteed to be the first argument of some tuple; either it will correspond with an atom, the only argument, or it will correspond with the main connective of a unary (i.e. negation, box, or diamond) or binary (i.e. conjunction or disjunction) subformula. ''' SiThing=findInFile(self.instanceFileLines, lambda x: "("+str(i)+")" in x) if SiThing: for k in range(SiThing, 0, -1): # go back in the file until you can find out what predicate we're dealing with if self.instanceFileLines[k].split(" ")[0] == "PREDICATE": if self.instanceFileLines[k].split(" ")[1] == "Falsum": return "false" return self.assignAtom(i) if findInFile(self.instanceFileLines, lambda x: "("+str(i)+"," in x): SiAsMainConnective = findInFile(self.instanceFileLines, lambda x: "("+str(i)+"," in x) for j in range(SiAsMainConnective, 0, -1): # go back in the file until you can find out what predicate we're dealing with if self.instanceFileLines[j].split(" ")[0] == "PREDICATE": if self.instanceFileLines[j].split(" ")[1] == "SameAtom": # if there are no tuples under SameAtom, then this isn't reached due to SiAsMainConnective return self.assignAtom(i) else: return self.assignSymbol(self.instanceFileLines[j].split(" ")[1]) # if the predicate refers to an operator, then we need to find out which one! def nodeCreation(self, predicate, SiConnective, i): SiAsOperand = findInFile(self.instanceFileLines, predicate) ParentOfSi = str(self.instanceFileLines[SiAsOperand].split(",")[0].split("(")[1]) self.syntaxTree.create_node(SiConnective, str(i), parent=ParentOfSi) def makeSyntaxTreeNode(self, SiConnective, i): if findInFile(self.instanceFileLines, lambda x: ","+str(i)+"," in x): #find where subformula i appears as second operand, and self.nodeCreation(lambda x: ","+str(i)+"," in x, SiConnective, i) elif findInFile(self.instanceFileLines, lambda x: ","+str(i)+")" in x): self.nodeCreation(lambda x: ","+str(i)+")" in x, SiConnective, i) else: self.syntaxTree.create_node(SiConnective,str(i)) def buildTree(self): ''' To build the syntax tree for the formula as laid out in the instance file, we need to delve into the formula by means of stripping off the main connective of each subformula (starting with the main connective of the formula itself) and labeling a tree node with the symbol corresponding with that connective. Note that each subformula appears exactly once as the first argument of a tuple, and can appear at most once as a second (or third, for binary operators) argument in a tuple. ''' self.syntaxTree = Tree() for i in range(1, self.numTreeNodes+1): SiConnective = self.determineConnective(i) self.makeSyntaxTreeNode(SiConnective, i) def myShowTree(self, tree, root): ''' In-order depth-first traversal of syntax tree using deep recursion; first layer of recursion receives root of tree, where each sub-layer receives respectively the left child then the right child as roots of those subtrees with visitation of the root node occurring in the middle. ''' rootNID = root.identifier x=self.syntaxTree.children(rootNID) if len(self.syntaxTree.children(rootNID)) == 2: print("(", end=" ") self.myShowTree(self.syntaxTree, self.syntaxTree.children(rootNID)[0]) print(self.syntaxTree.get_node(rootNID).tag, end=" ") if len(self.syntaxTree.children(rootNID)) >= 1: if len(self.syntaxTree.children(rootNID)) == 1: print("(", end=" ") self.myShowTree(self.syntaxTree, self.syntaxTree.children(rootNID)[0]) else: self.myShowTree(self.syntaxTree, self.syntaxTree.children(rootNID)[1]) print(")", end=" ")
class ParentChildEvaluate: """ Class to perform intrinsic evaluation of embeddings using the hierarchical relation of parent/child domains 1) parse ParendChildTreeFile.txt from interpro 2) for each child of root nn = ask embeddings model to give M nearest neighbors calculate_precision_atM(child.descendants, nn) calculate_recall_atN(child.descendants, nn) 3) plot histogram of precision and recall #Credits: https://medium.com/@m_n_malaeb/recall-and-precision-at-k-for-recommender-systems-618483226c54 """ def __init__(self, data_path): """ ParentChildEvaluate class init Parameters ---------- data_path : str full data path Returns ------- None """ print("ParentChildEvaluate") self.data_path = data_path self.tree = Tree() def get_model_name(self): """ Get embedding model name Parameters ---------- Returns ------- str embedding model name """ return ntpath.basename(self.model_file) def load_emb_model(self, model_file, is_model_binary): """ Load embedding model Parameters ---------- model_file : str model file name is_model_binary : bool model is saved in binary format (True), otherwise (False) Returns ------- None """ self.model_file = model_file self.emb_model = KeyedVectors.load_word2vec_format( model_file, binary=is_model_binary) def parse_parent_child_file(self, parent_child_file_name, out_path, output_file_name, save_parsed_tree=False): """ Parse the parent child file Parameters ---------- parent_child_file_name : str parent child file name out_path : str output data path output_file_name : str output file name save_parsed_tree : bool after parsing save parsed tree (True), otherwise (False) Returns ------- None """ previous_num_minus_signs = 0 last_interpro_id = None self.tree.create_node("INTERPRO", "INTERPRO") current_parent = "INTERPRO" with open(parent_child_file_name, 'r') as parent_child_file: for line in parent_child_file: line = line.strip() current_num_minus_signs = line[0:line.find("IPR")].count("--") double_colon_split = line.strip("--").split("::") interpro_id = double_colon_split[0] assert interpro_id[ 0: 3] == "IPR", "AssertionError: {} \n interpro id should start with IPR and has length of 9.".format( interpro_id) if current_num_minus_signs == 0: # assert child not in the tree current_parent = "INTERPRO" self.tree.create_node(interpro_id, interpro_id, parent=current_parent) else: # check if you are still with current parent or you need to create a new one if current_num_minus_signs == previous_num_minus_signs: # same level as last parent self.tree.create_node(interpro_id, interpro_id, parent=current_parent) elif current_num_minus_signs > previous_num_minus_signs: # one level down from last parent -> create new parent current_parent = last_interpro_id self.tree.create_node(interpro_id, interpro_id, parent=current_parent) else: # one level up from last parent -> get parent of the current parent if current_parent == "INTERPRO": # if one level up is the root then your papa is the root papa = "INTERPRO" else: # if one level up is not the root then get the parent of your parent (papa) papa = self.tree[current_parent].bpointer self.tree.create_node(interpro_id, interpro_id, parent=papa) current_parent = papa previous_num_minus_signs = current_num_minus_signs last_interpro_id = interpro_id # quick test # for interpro_node in self.tree.children("IPR000549"): # print(interpro_node.identifier) # self.tree.show() if save_parsed_tree: self.tree.save2file( filename=os.path.join(out_path, output_file_name)) def get_nn_calculate_precision_recall_atN(self, N, plot_histograms, save_diagnostics): """ Get nearest domain vector for each domains and calculate recall based on the ground truth (parsed tree) Parameters ---------- N : int number of nearest domain vector, if N==100 then retrieve as many as the children of a domain in the parsed tree plot_histograms : bool plot histograms for performance metrics (True), otherwise (False) save_diagnostics : bool save diagnostic plots for domain with low recall Returns ------- None """ print("Get NN and calculate precision and recall at {}".format(N)) recalls_n = [] precisions_n = [] interpros_recall0 = [] interpros_num_children_recall0 = [] if N == 100: retrieve_all_children = True else: retrieve_all_children = False for interpro_node in self.tree.children("INTERPRO"): recall_n = 0.0 precision_n = 0.0 all_children = self.tree.subtree( interpro_node.identifier).all_nodes() assert interpro_node in all_children, "AssertionError: parent {} is not in the set of all children.".format( interpro_node.identifier) all_children.remove(interpro_node) if retrieve_all_children: N = len(all_children) if self.emb_model.__contains__(interpro_node.identifier): nearest_neighbor_ids = set([ nn[0] for nn in self.emb_model.most_similar( positive=interpro_node.identifier, topn=N) ]) else: print("Model does not contain this id.") continue true_positives = set([child.identifier for child in all_children ]).intersection(nearest_neighbor_ids) assert len(all_children) > 0 and len( nearest_neighbor_ids ) == N, "AssertionError: For parent {} all children should be > 0 and nearest neighbors should be equal to N.".format( interpro_node.identifier) recall_n = len(true_positives) / len(all_children) precision_n = len(true_positives) / len(nearest_neighbor_ids) assert 0.0 <= recall_n <= 1.0 and 0.0 <= precision_n <= 1.0, "AssertionError: For parent {} recall or precision is not at (0,1]".format( interpro_node.identifier) recalls_n.append(recall_n) precisions_n.append(precision_n) if recall_n == 0.0: interpros_recall0.append(interpro_node.identifier) interpros_num_children_recall0.append(len(all_children)) if retrieve_all_children: # for printing in title N = 100 if plot_histograms: if retrieve_all_children: self.plot_histogram(recalls_n, "Recall", "Recall", "Number of Interpro domains", "recall") else: self.plot_histogram(recalls_n, "Recall@{}".format(N), "Recall", "Number of Interpro domains", "recall_{}".format(N)) self.plot_histogram(precisions_n, "Precision@{}".format(N), "Precision", "Number of Interpro domains", "precision_{}".format(N)) if retrieve_all_children: avg_recall = sum(recalls_n) / len(recalls_n) print("Average recall at 100: {:.3f}".format(avg_recall)) if save_diagnostics: self.save_diagnostics_recall0(interpros_recall0, interpros_num_children_recall0) def save_diagnostics_recall0(self, interpros_recall0, interpros_num_children_recall0): """ Save diagnostics histogram for domains with recall of 0 Parameters ---------- interpros_recall0 : list of str interpro ids with recall 0 interpros_num_children_recall0 : list of str number of children of each interpro id, found from the parsed tree, with recall 0 Returns ------- None """ print("Saving diagnostics for intepro domains with recall 0") with open( os.path.join( self.data_path, self.get_model_name() + "_interpros_recall0" + ".txt"), "w") as interpros_recall0_file: # write file with names of interpro having recall 0 interpros_recall0_file.write("\n".join(interpros_recall0)) # plot histogram of number of children for interpro parents with recall 0 self.plot_histogram(interpros_num_children_recall0, None, "Number of Intepro domains", "Number of children", "hist") def plot_histogram(self, performance_N, title, xlabel, ylabel, out_suffix): """ Plot histogram for performance metric and also for the number of children Parameters ---------- performance_N : list of float performance metric value per parent domain title : str histogram title (if not None) xlabel : str label x ylabel : str label y out_suffix : str histogram output file name suffix Returns ------- None """ # plot the histogram of lengths fig = plt.figure() plt.hist(performance_N, color='g', align='left', edgecolor='k', alpha=0.8) plt.xlabel(xlabel, fontsize=14) plt.ylabel(ylabel, fontsize=14) if title is not None: plt.title(title, fontsize=14) plt.xticks(np.arange(0, 1.1, 0.1)) hist_name = self.get_model_name() + "_" + out_suffix + ".png" fig.savefig(os.path.join(self.data_path, hist_name), bbox_inches='tight', dpi=600)
class Bftree: def __init__(self, max_depth=2, min_size=30, n_features=500, criterion=None): self.max_depth = max_depth self.min_size = min_size self.n_features = n_features self.tree = Tree() self.criterion = criterion def fit(self, X, y): """Fits a Trainingset Calculates initial best scoring feature for root node Recurively adds child features to root node """ root = utils.get_split(data=X, targets=y, tree=self.tree, criterion=self.criterion, n_features=self.n_features) n_tag = "%s=%d (%f)" % (root['index'], root['attr'], round(root['score'], 2)) n_dict = {"attr": root['attr'], "score": root['score'], "child": 0} self.tree.create_node(n_tag, root['index'], data=n_dict, parent=None) self.split(root, 1) def split(self, res, current_depth): """Splits a node into l/r child Adds left/right node if split partitions exist Split partitions are added as child nodes if score child > score root Halts execution if termination criteria are meet: 1) get_split returns empty partitions 2) current_depth >= max_depth 3) partition (l/r) size <= min_size (rows) Returns current tree if termination criteria meet """ # Check if partition(s) exist. if (res['groups'] != None): left, right = res['groups'][0] # L/R feature partitions left_y, right_y = res['groups'][1] # L/R scoring partitions else: return self.tree # Return tree if max_depth reached. if current_depth >= self.max_depth: return self.tree # Return tree if left feature partition < min_size if left.shape[0] >= self.min_size: # Calculate split score for left feature partition. current_left = utils.get_split(data=left, targets=left_y, tree=self.tree, criterion=self.criterion, n_features=self.n_features) # Check if feature is returned or None when not improving. if (current_left['index'] is not None): # Add new left child node n_tag = "%s=%d(L) (%f)" % (current_left['index'], current_left['attr'], round(current_left['score'], 2)) n_dict = { "attr": current_left['attr'], "score": current_left['score'], "child": 1 } self.tree.create_node(tag=n_tag, identifier=current_left['index'], data=n_dict, parent=res['index']) self.split(current_left, current_depth + 1) # Return tree if feature feature partition < min_size if right.shape[0] >= self.min_size: # Calculate split score for right feature partition. current_right = utils.get_split(data=right, targets=right_y, tree=self.tree, criterion=self.criterion, n_features=self.n_features) # Check if feature is returned or None when not improving. if (current_right['index'] is not None): # Add new right child node n_tag = "%s=%d(R) (%f)" % (current_right['index'], current_right['attr'], round(current_right['score'], 2)) n_dict = { "attr": current_right['attr'], "score": current_right['score'], "child": 2 } self.tree.create_node(tag=n_tag, identifier=current_right['index'], data=n_dict, parent=res['index']) self.split(current_right, current_depth + 1) def getChild(self, feature, site): """Helper to return the L/R child of a specific node based on a Node's dict. L_child: 1 R_child: 2 """ children = self.tree.children(feature) if len(children) > 0: for ch in children: if ch.data['child'] is site: return ch else: return None def predict(self, feature, dataset): """"Predicts the "Class" of a feature set (Pandas series). Left branches will be classified as class 0 (no-benedit) Right branches will be classified as class 1 (benedit) """ current_node = self.tree.get_node(feature) if (dataset[feature].item() == current_node.data['attr']): # Root match. Check if left child (site=1) exists. if self.getChild(feature, 1) is not None: # Recursive prediction for child node. return (self.predict( self.getChild(feature, 1).identifier, dataset)) else: # Stop iteration return (1) else: # Root match. Check if right child (site=2) exists. if self.getChild(feature, 2) is not None: # Recursive prediction for child node. return (self.predict( self.getChild(feature, 2).identifier, dataset)) else: # Stop iteration return (0)
class MCTS_better(): def __init__(self, confidence=CONFIDENCE, time=MAX_CAL_TIME, max_actions=1000): self.max_cal_time = float(time) self.max_actions = max_actions self.confidence = confidence self.max_depth = 0 def get_move(self, board, player): self.board = board self.player = player # The chess color [BLACK or WHITE] represent the player empty_set, a, b = self.board.get_board_item() if len(a) == 0 and len(b) == 0: return (MIDDLE, MIDDLE) if len(empty_set) == 1: return (empty_set[0][0], empty_set[0][1]) if len(empty_set) == 0: print("No place to play") return None self.MCTS_tree = Tree() self.HeadNode = Node('HeadNode', 0) self.MCTS_tree.add_node(self.HeadNode) self.plays = {} self.wins = {} simulations = 0 start = time.time() while time.time() - start < (self.max_cal_time - 0.5): board_for_MCTS = copy.deepcopy(self.board) player_for_MCTS = self.player self.run_simulation(board_for_MCTS, player_for_MCTS) simulations += 1 print("total simuations = ", simulations) move = self.select_best_move() print("MCTS move:", move[0], move[1]) return move def run_simulation(self, board, player): tree = self.MCTS_tree node = self.HeadNode availables = board.get_k_dist_empty_tuple(2) visited_states = set() winner = -1 expand = True # Simulation Start for t in range(1, self.max_actions + 1): availables = board.get_k_dist_empty_tuple(2) children = tree.children(node.identifier) self.plays = {} self.wins = {} plays = self.plays wins = self.wins for n in children: plays[n.tag] = n.data[0] wins[n.tag] = n.data[1] # Selection noused_set = self.select_noused_node(availables, player) if len(noused_set) == 0: #print("ok") #print(sum(plays[(player,move)] for move in availables)) log_total = log( sum(plays[(player, move)] for move in availables)) value, move = max( ((wins[(player, move)] / plays[(player, move)]) + sqrt(self.confidence * log_total / plays[(player, move)]), move) for move in availables) #print(move) for n in children: if n.tag == (player, move): node = n else: #print('good') random.shuffle(noused_set) move = noused_set.pop() new_node = Node(tag=(player, move), data=[0, 0]) tree.add_node(new_node, parent=node) node = new_node board.draw_xy(move[0], move[1], player) #Expand # if expand and (player,move,t) not in plays: # expand = False # plays[(player,move,t)] = 0 # wins[(player,move,t)] = 0 # if t > self.max_depth: # self.max_depth = t #visited_states.add((player,move)) availables = board.get_k_dist_empty_tuple(2) is_full = not len(availables) winner = board.anyone_win(move[0], move[1]) if winner is not EMPTY or is_full: #print(str(move) + '----' + str(winner) + '-----' + str(player)) break player = self.player_change(player) while (node.is_root() == False): if winner == self.player: node.data[1] += 1 node.data[0] += 1 node = tree.parent(node.identifier) def select_best_move(self): empty_set = self.board.get_k_dist_empty_tuple(2) # for move in empty_set: # ratio = (self.wins.get((self.player,move),0)/ # self.plays.get((self.player,move),1)) # print(move) # print(ratio) #print(empty_set) self.plays = {} self.wins = {} plays = self.plays wins = self.wins children = self.MCTS_tree.children(0) for n in children: plays[n.tag] = n.data[0] wins[n.tag] = n.data[1] # print(plays) # print(wins) raio_to_win, move = max( (self.wins.get((self.player, move), 0) / self.plays.get((self.player, move), 1) + self.closest_value(move), move) for move in empty_set) print(raio_to_win) return move def closest_value(self, move): x = move[0] y = move[1] return (abs((x - N_LINE + 1) * x) + abs((y - N_LINE + 1) * y)) * 0.0001 def player_change(self, player): if player == BLACK: player = WHITE elif player == WHITE: player = BLACK return player def select_noused_node(self, availables, player): noused_set = [] for move in availables: if not self.plays.get((player, move)): noused_set.append(move) return noused_set
class StepParse: def __init__(self): pass def load_step(self, step_filename): self.nauo_lines = [] self.prod_def_lines = [] self.prod_def_form_lines = [] self.prod_lines = [] self.filename = os.path.splitext(step_filename)[0] line_hold = '' line_type = '' # Find all search lines with open(step_filename) as f: for line in f: # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines index = re.search("#(.*)=", line) if index: # TH: if not none then it is the start of a line so read it # want to hold line until it has checked next line # if next line is a new indexed line then save previous line if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' prev_index = True # TH remember previous line had an index if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line: line_hold = line.rstrip() line_type = 'nauo' elif ('PRODUCT_DEFINITION ' in line or 'PRODUCT_DEFINITION(' in line): line_hold = line.rstrip() line_type = 'prod_def' elif 'PRODUCT_DEFINITION_FORMATION' in line: line_hold = line.rstrip() line_type = 'prod_def_form' elif ('PRODUCT ' in line or 'PRODUCT(' in line): line_hold = line.rstrip() line_type = 'prod' else: prev_index = False #TH: if end of file and previous line was held if 'ENDSEC;' in line: if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' else: #TH: if not end of file line_hold = line_hold + line.rstrip() self.nauo_refs = [] self.prod_def_refs = [] self.prod_def_form_refs = [] self.prod_refs = [] # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words' # Find all (# hashed) line references and product names # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't for j, el_ in enumerate(self.nauo_lines): self.nauo_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_def_lines): self.prod_def_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_def_form_lines): self.prod_def_form_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_lines): self.prod_refs.append([ el.strip(',') for el in el_.replace(",", " ").replace( "(", " ").replace("=", " ").split() if el.startswith('#') ]) self.prod_refs[j].append(el_.split("'")[1]) # Get first two items in each sublist (as third is shape ref) # # First item is 'PRODUCT_DEFINITION' ref # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref self.prod_all_refs = [el[:2] for el in self.prod_def_refs] # Match up all references down to level of product name for j, el_ in enumerate(self.prod_all_refs): # Add 'PRODUCT_DEFINITION' ref for i, el in enumerate(self.prod_def_form_refs): if el[0] == el_[1]: el_.append(el[1]) break # Add names from 'PRODUCT_DEFINITION' lines for i, el in enumerate(self.prod_refs): if el[0] == el_[2]: el_.append(el[2]) break # Find all parent and child relationships (3rd and 2nd item in each sublist) self.parent_refs = [el[1] for el in self.nauo_refs] self.child_refs = [el[2] for el in self.nauo_refs] # Find distinct parts and assemblies via set operations; returns list, so no repetition of items self.all_type_refs = set(self.child_refs) | set(self.parent_refs) self.ass_type_refs = set(self.parent_refs) self.part_type_refs = set(self.child_refs) - set(self.parent_refs) #TH: find root node self.root_type_refs = set(self.parent_refs) - set(self.child_refs) # Create simple parts dictionary (ref + label) self.part_dict = {el[0]: el[3] for el in self.prod_all_refs} # self.part_dict_inv = {el[3]:el[0] for el in self.prod_all_refs} def show_values(self): # TH: basic testing, if needed these could be spilt up print(self.nauo_lines) print(self.prod_def_lines) print(self.prod_def_form_lines) print(self.prod_lines) print(self.nauo_refs) print(self.prod_def_refs) print(self.prod_def_form_refs) print(self.prod_refs) # HR: "create_dict" replaced by list comprehension elsewhere # # def create_dict(self): # # # TH: links nauo number with a name and creates dict # self.part_dict = {} # for part in self.all_type_refs: # for sublist in self.prod_def_refs: # if sublist[0] == part: # prod_loc = '#' + re.findall('\d+',sublist[1])[0] # pass # for sublist in self.prod_def_form_refs: # if sublist[0] == prod_loc: # prod_loc = '#' + str(re.findall('\d+',sublist[1])[0]) # pass # for sublist in self.prod_refs: # if sublist[0] == prod_loc: # part_name = sublist[2] # # self.part_dict[part] = part_name def create_tree(self): #TH: create tree diagram in newick format #TH: find root node self.tree = Tree() #TH: check if there are any parts to make a tree from, if not don't bother if self.part_dict == {}: return root_node_ref = list(self.root_type_refs)[0] # HR added part reference as data for later use self.tree.create_node(self.part_dict[root_node_ref], 0, data={'ref': root_node_ref}) #TH: created root node now fill in next layer #TH: create dict for tree, as each node needs a unique name i = [0] # Iterates through nodes self.tree_dict = {} self.tree_dict[i[0]] = root_node_ref def tree_next_layer(self, parent): root_node = self.tree_dict[i[0]] for line in self.nauo_refs: if line[1] == root_node: i[0] += 1 self.tree_dict[i[0]] = str(line[2]) # HR added part reference as data for later use self.tree.create_node(self.part_dict[line[2]], i[0], parent=parent, data={'ref': str(line[2])}) tree_next_layer(self, i[0]) tree_next_layer(self, 0) self.appended = False self.get_levels() def get_levels(self): # Initialise dict and get first level (leaves) self.levels = {} self.levels_set_p = set() self.levels_set_a = set() self.leaf_ids = [el.identifier for el in self.tree.leaves()] self.all_ids = [el for el in self.tree.nodes] self.non_leaf_ids = set(self.all_ids) - set(self.leaf_ids) self.part_level = 1 def do_level(self, tree_level): # Get all nodes within this level node_ids = [ el for el in self.tree.nodes if self.tree.level(el) == tree_level ] for el in node_ids: # If leaf, then n_p = 1 and n_a = 1 if el in self.leaf_ids: self.levels[el] = {} self.levels[el]['n_p'] = self.part_level self.levels[el]['n_a'] = self.part_level # If assembly, then get all children and sum all parts + assemblies else: # Get all children of node and sum levels child_ids = self.tree.is_branch(el) child_sum_p = 0 child_sum_a = 0 for el_ in child_ids: child_sum_p += self.levels[el_]['n_p'] child_sum_a += self.levels[el_]['n_a'] self.levels[el] = {} self.levels[el]['n_p'] = child_sum_p self.levels[el]['n_a'] = child_sum_a + 1 self.levels_set_p.add(child_sum_p) self.levels_set_a.add(child_sum_a + 1) # Go up through tree levels and populate lattice level dict for i in range(self.tree.depth(), -1, -1): do_level(self, i) self.create_lattice() self.levels_p_sorted = sorted(list(self.levels_set_p)) self.levels_a_sorted = sorted(list(self.levels_set_a)) # Function to return dictionary of item IDs for each lattice level def get_levels_inv(list_in, key): #Initialise levels_inv = {} levels_inv[self.part_level] = [] for el in list_in: levels_inv[el] = [] for k, v in self.levels.items(): levels_inv[v[key]].append(k) return levels_inv self.levels_p_inv = get_levels_inv(self.levels_p_sorted, 'n_p') self.levels_a_inv = get_levels_inv(self.levels_a_sorted, 'n_a') def get_all_children(self, id_): ancestors = [el.identifier for el in self.tree.children(id_)] parents = ancestors while parents: children = [] for parent in parents: children = [el.identifier for el in self.tree.children(parent)] ancestors.extend(children) parents = children return ancestors def create_lattice(self): # Create lattice self.g = nx.DiGraph() self.default_colour = 'r' # Get root node and set parent to -1 to maintain data type of "parent" # Set position to top/middle node_id = self.tree.root label_text = self.tree.get_node(node_id).tag self.g.add_node(node_id, parent=-1, label=label_text, colour=self.default_colour) # Do nodes from treelib "nodes" dictionary for key in self.tree.nodes: # Exclude root if key != self.tree.root: parent_id = self.tree.parent(key).identifier label_text = self.tree.get_node(key).tag # Node IDs same as for tree self.g.add_node(key, parent=parent_id, label=label_text, colour=self.default_colour) # Do edges from nodes for key in self.tree.nodes: # Exclude root if key != self.tree.root: parent_id = self.tree.parent(key).identifier self.g.add_edge(key, parent_id) # Escape if only one node # HR 6/3/20 QUICK BUG FIX: SINGLE-NODE TREE DOES NOT PLOT # IMPROVE LATER; SHOULD BE PART OF A GENERAL METHOD if self.tree.size() == 1: id_ = [el.identifier for el in self.tree.leaves()] self.g.nodes[id_[-1]]['pos'] = (0, 0) return # Get set of parents of leaf nodes leaf_parents = set( [self.tree.parent(el).identifier for el in self.leaf_ids]) # For each leaf_parent, set position of leaf nodes sequentially i = 0 no_leaves = len(self.tree.leaves()) for el in leaf_parents: for el_ in self.tree.is_branch(el): child_ids = [el.identifier for el in self.tree.leaves()] if el_ in child_ids: self.g.nodes[el_]['pos'] = ((i / (no_leaves)), 1) i += 1 # To set plot positions of nodes from lattice levels # --- # Traverse upwards from leaves for el in sorted(list(self.levels_set_a)): # Get all nodes at that level node_ids = [k for k, v in self.levels.items() if v['n_a'] == el] # Get all positions of children of that node # and set position as mean value of them for el_ in node_ids: child_ids = self.tree.is_branch(el_) pos_sum = 0 for el__ in child_ids: pos_ = self.g.nodes[el__]['pos'][0] pos_sum += pos_ pos_sum = pos_sum / len(child_ids) self.g.nodes[el_]['pos'] = (pos_sum, el) def print_tree(self): try: self.tree.show() except: self.create_tree() self.tree.show() def tree_to_json(self, save_to_file=False, filename='file', path=''): #TH: return json format tree, can also save to file if self.tree.size() != 0: data = self.tree.to_json() j = json.loads(data) if save_to_file == True: if path: file_path = os.path.join(path, filename) else: file_path = filename with open(file_path + '.json', 'w') as outfile: json.dump(j, outfile) return data else: print("no tree to print") return
class LuaDec: def __init__(self, fileName, format = "luadec"): self.format = format self.ptr = 0 self.pc = 0 self.tree = Tree() self.readFile(fileName) self.readHeader() self.readFunction() #self.tree.show() def readFile(self, fileName): f = open(fileName, "rb") self.fileBuf = f.read() f.close() def readUInt32(self): result = struct.unpack("<I", self.fileBuf[self.ptr:self.ptr + 4])[0] self.ptr += 4 return result def readUInt64(self): result = struct.unpack("<Q", self.fileBuf[self.ptr:self.ptr + 8])[0] self.ptr += 8 return result def formatValue(self, val): if type(val) == str: return "\"{}\"".format(val) elif type(val) == bool: if val: return "true" else: return "false" elif val is None: return "nil" elif type(val) == float and int(val) == val: return int(val) else: return val def processUpvalue(self, i, funcName): if i[0] == 1: if funcName == "root": return "G" return "UR{}".format(i[1]) elif i[0] == 0: pNode = self.tree.parent(funcName) result = self.processUpvalue(pNode.data['upvalues'][i[1]], pNode.identifier) if result[-1] != "G": return "U" + result else: return result else: raise Exception("Unexpected upvalue {}".format(i[0])) def readHeader(self): magic = self.fileBuf[:4] if magic != b"\x1bLua": raise Exception("Unknown magic: {0}".format(magic.hex())) version = self.fileBuf[4] if version != 82: raise Exception("This program support ONLY Lua 5.2") lua_tail = self.fileBuf[12:18] if lua_tail != b"\x19\x93\r\n\x1a\n": raise Exception("Unexcepted lua_tail value: {0}".format(lua_tail.hex())) self.ptr = 18 def readFunction(self, parent=None): #处理tree if parent: funcName = "function" funcSuffix = [] #强烈谴责py不支持do...while #别问我这堆东西怎么工作的,it just works!! pNode = self.tree.get_node(parent).identifier funcSuffix.append("_{0}".format(len(self.tree.children(pNode)))) while self.tree.parent(pNode): pNode = self.tree.parent(pNode).identifier funcSuffix.append("_{0}".format(len(self.tree.children(pNode)) - 1)) funcSuffix.reverse() for i in funcSuffix: funcName += i else: funcName = "root" #self.tree.show() #ProtoHeader protoheader = struct.unpack("<IIccc", self.fileBuf[self.ptr:self.ptr + 11]) self.ptr += 11 lineDefined = protoheader[0] lastLineDefined = protoheader[1] numParams = ord(protoheader[2]) is_vararg = ord(protoheader[3]) maxStackSize = ord(protoheader[4]) #Code sizeCode = self.readUInt32() instructions = [] #print("Code total size: {0}".format(sizeCode)) for i in range(sizeCode): ins = self.readUInt32() instructions.append(ins) #self.processInstruction(ins) #print("Instruction: {0}".format(hex(ins))) #Constants sizeConstants = self.readUInt32() constants = [] #print("Constants total size: {0}".format(sizeConstants)) for i in range(sizeConstants): const_type = self.fileBuf[self.ptr] self.ptr += 1 if const_type == const.LUA_DATATYPE['LUA_TNIL']: const_val = None const_type = "nil" elif const_type == const.LUA_DATATYPE['LUA_TNUMBER']: #lua的number=double(8 bytes) const_val = struct.unpack("<d", self.fileBuf[self.ptr:self.ptr + 8])[0] self.ptr += 8 const_type = "number" elif const_type == const.LUA_DATATYPE['LUA_TBOOLEAN']: const_val = bool(self.fileBuf[self.ptr]) self.ptr += 1 const_type = "bool" elif const_type == const.LUA_DATATYPE['LUA_TSTRING']: str_len = self.readUInt32() buf = self.fileBuf[self.ptr:self.ptr + str_len - 1] try: const_val = str(buf, encoding="utf8") except UnicodeDecodeError: const_val = "" for i in buf: const_val += "\\{}".format(i) self.ptr += str_len const_type = "string" if self.fileBuf[self.ptr - 1] != 0: raise Exception("Bad string") else: raise Exception("Undefined constant type {0}.".format(hex(const_type))) constants.append([const_val, const_type]) #print("Constant: {0}".format(const_val)) #Skip Protos ptrBackupStart = self.ptr #备份protos的位置,先处理后面的upvalue等东西 sizeProtos = self.readUInt32() for i in range(sizeProtos): self.skipFunction() #Upvalue sizeUpvalue = self.readUInt32() upvalues = [] #print("Upvalue total size: {0}".format(sizeUpvalue)) for i in range(sizeUpvalue): instack = self.fileBuf[self.ptr] idx = self.fileBuf[self.ptr + 1] self.ptr += 2 upvalues.append([instack, idx]) #print("Upvalue: {0} {1}".format(instack, idx)) #srcName sizeSrcName = self.readUInt32() #print("srcName size: {0}".format(sizeSrcName)) if sizeSrcName > 0: srcName = str(self.fileBuf[self.ptr:self.ptr + sizeSrcName], encoding="utf8") self.ptr += sizeSrcName #print("srcName: " + srcName) #Lines sizeLines = self.readUInt32() self.ptr += sizeLines #LocVars sizeLocVars = self.readUInt32() #for i in sizeLocVars: # varname_size = #TODO: sizeLocVars不为0的情况(未strip) #UpvalNames sizeUpvalNames = self.readUInt32() #将内容写入tree data = { "instructions": instructions, "constants": constants, "upvalues": upvalues, } self.tree.create_node(funcName, funcName, parent=parent, data=data) if self.format == "luaasm": print("\n.fn(R{}{})".format(numParams, ", __va_args__" if is_vararg else "")) print("; {:<20s}{}".format("Function", funcName)) print("; {:<20s}{}".format("Defined from line", lineDefined)) print("; {:<20s}{}".format("Defined to line", lastLineDefined)) print("; {:<20s}{}".format("#Upvalues", sizeUpvalue)) print("; {:<20s}{}".format("#Parameters", numParams)) print("; {:<20s}{}".format("Is_vararg", is_vararg)) if self.format == "luaasm": print("; {:<20s}{}".format("Max Stack Size", maxStackSize)) else: print("; {:<20s}{}\n".format("Max Stack Size", maxStackSize)) #生成一个Upvalue和Constant的拼接表 fmtVals = {} count = 0 for i in data['constants']: fmtVals["K{}".format(count)] = self.formatValue(i[0]) count += 1 count = 0 for i in data['upvalues']: fmtVals["U{}".format(count)] = self.processUpvalue(i, funcName) count += 1 if self.format == "luadec": #处理单个指令 self.pc = 0 self.currFunc = funcName self.fmtVals = fmtVals for i in data['instructions']: self.processInstruction(i) self.pc += 1 if self.format == "luadec": print("\n") if self.format == "luaasm": print("\n.instruction") #处理单个指令 self.pc = 0 self.currFunc = funcName self.fmtVals = fmtVals for i in data['instructions']: self.processInstruction(i) self.pc += 1 if self.format == "luaasm": print("\n.const") else: print("\n; Constants") count = 0 for i in data['constants']: print("K{:<5s} = {}".format(str(count), self.formatValue(i[0]))) count += 1 if self.format == "luaasm": print("\n.upvalue") else: print("\n; Upvalues") count = 0 for i in data['upvalues']: if self.format == "luaasm": print("U{:<5s} = L{} R{}".format(str(count), i[0], i[1])) else: print("{:>5s}\t{}\t{}".format(str(count), i[0], i[1])) count += 1 #Proto ptrBackupEnd = self.ptr self.ptr = ptrBackupStart sizeProtos = self.readUInt32() #print("Protos total size: {0}".format(sizeProtos)) for i in range(sizeProtos): self.readFunction(parent=funcName) self.ptr = ptrBackupEnd if self.format == "luaasm": print(".endfn\n") #跳过函数,用于需要获取后面的指针位置的情况 def skipFunction(self): #print("Start skipping Proto, current ptr at {0}".format(hex(self.ptr))) #ProtoHeader self.ptr += 11 #Code sizeCode = self.readUInt32() for i in range(sizeCode): self.ptr += 4 #Constants sizeConstants = self.readUInt32() for i in range(sizeConstants): const_type = self.fileBuf[self.ptr] self.ptr += 1 if const_type == const.LUA_DATATYPE['LUA_TNIL']: pass elif const_type == const.LUA_DATATYPE['LUA_TNUMBER']: self.ptr += 8 elif const_type == const.LUA_DATATYPE['LUA_TBOOLEAN']: self.ptr += 1 elif const_type == const.LUA_DATATYPE['LUA_TSTRING']: str_len = self.readUInt32() self.ptr += str_len else: raise Exception("Undefined constant type {0}.".format(hex(const_type))) #Protos sizeProtos = self.readUInt32() for i in range(sizeProtos): self.skipFunction() #Upvalue sizeUpvalue = self.readUInt32() for i in range(sizeUpvalue): self.ptr += 2 #srcName sizeSrcName = self.readUInt32() if sizeSrcName > 0: self.ptr += sizeSrcName #Lines sizeLines = self.readUInt32() self.ptr += sizeLines #LocVars sizeLocVars = self.readUInt32() #for i in sizeLocVars: # varname_size = #TODO: sizeLocVars不为0的情况(未strip) #UpvalNames sizeUpvalNames = self.readUInt32() #print("End skipping Proto. Current ptr at {0}".format(hex(self.ptr))) def getExtraArg(self): next_ins = self.tree.get_node(self.currFunc).data['instructions'][self.pc + 1] opCode = next_ins % (1 << 6) if const.opCode[opCode] == "OP_EXTRAARG": Ax = (next_ins >> 6) return True, Ax else: return False, "ERROR: C == 0 but no OP_EXTRAARG followed." def processInstruction(self, ins): opCode = ins % (1 << 6) opMode = const.opMode[opCode] A = 0 B = 0 C = 0 if opMode[4] == "iABC": A = (ins >> 6 ) % (1 << 8) B = (ins >> 23)#% (1 << 9) C = (ins >> 14) % (1 << 9) elif opMode[4] == "iABx": A = (ins >> 6 ) % (1 << 8) B = (ins >> 14)#% (1 << 18) elif opMode[4] == "iAsBx": A = (ins >> 6 ) % (1 << 8) B = (ins >> 14) - (1 << 17) + 1 elif opMode[4] == "iAx": A = (ins >> 6 )#% (1 << 26) else: raise Exception("Unknown opMode {0}".format(opMode[4])) #format A if opMode[1] == 1: parsedA = "R{0}".format(A) elif opMode[1] == 0: if const.opCode[opCode] == "OP_SETTABUP": parsedA = "U{0}".format(A) elif const.opCode[opCode] in ["OP_EQ", "OP_LT", "OP_LE"]: parsedA = A else: parsedA = "R{0}".format(A) else: raise Exception("Unknown A Mode {0}".format(opMode[1])) #format B if opMode[2] == 1: if const.opCode[opCode].find("UP") >= 0: parsedB = "U{0}".format(B) else: parsedB = "{0}".format(B) elif opMode[2] == 0: parsedB = "" elif opMode[2] == 2 or opMode[2] == 3: if opMode[4] == "iAsBx": #B为sBx的时候,只有可能是立即数而不是寄存器 parsedB = "{0}".format(B) elif const.opCode[opCode] == "OP_LOADK": #LOADK一定是读Kx而不是Rx parsedB = "K{0}".format(B) elif B < 0x100: parsedB = "R{0}".format(B) else: parsedB = "K{0}".format(B - 0x100) B -= 0x100 else: raise Exception("Unknown B Mode {0}".format(opMode[2])) #format C if opMode[3] == 1: if const.opCode[opCode].find("UP") >= 0: parsedC = "U{0}".format(C) else: parsedC = "{0}".format(C) elif opMode[3] == 0: parsedC = "" elif opMode[3] == 2 or opMode[3] == 3: if C < 0x100: parsedC = "R{0}".format(C) else: parsedC = "K{0}".format(C - 0x100) C -= 0x100 else: raise Exception("Unknown C Mode {0}".format(opMode[3])) # parse comment #先用模板拼接 if len(parsedB) > 0 and (parsedB[0] == 'K' or parsedB[0] == 'U'): parsedB_ = "{{{}}}".format(parsedB) else: parsedB_ = parsedB if len(parsedC) > 0 and (parsedC[0] == 'K' or parsedC[0] == 'U'): parsedC_ = "{{{}}}".format(parsedC) else: parsedC_ = parsedC comment = const.pseudoCode[opCode].format(A=A,B=B,C=C,PB=parsedB_,PC=parsedC_) #预处理 #if BForceK: # comment = comment.replace("R{}".format(B), "K{}".format(B)) #if const.opCode[opCode] == "OP_SETTABLE" and CForceK: # comment = comment.replace("R{}".format(C), "{{K{}}}".format(C)) #再处理Upvalue和Constants comment = comment.format(**self.fmtVals) #对部分需要处理的命令进行处理 if const.opCode[opCode] == "OP_LOADBOOL": #把0/1转换成false/true comment = comment[:-1] if B: comment += "true" else: comment += "false" #处理跳转 if C: comment += "; goto {0}".format(self.pc + 2) elif const.opCode[opCode] == "OP_LOADNIL": comment = "" for i in range(B + 1): comment += "R{0}, ".format(A + i) comment = comment[:-2] comment += " := nil" elif const.opCode[opCode] == "OP_SELF": comment = "R{}".format(A+1) + comment[2:] elif const.opCode[opCode] == "OP_JMP": comment += " (goto {0})".format(self.pc + 1 + B) elif const.opCode[opCode] in ["OP_EQ", "OP_LT", "OP_LE", "OP_TEST", "OP_TESTSET"]: if A: if const.opCode[opCode] == "OP_EQ": comment = comment.replace("==", "~=") elif const.opCode[opCode] == "OP_LT": comment = comment.replace("<", ">=") elif const.opCode[opCode] == "OP_LE": comment = comment.replace("<=", ">") comment += " goto {0} else goto {1}".format(self.pc + 2, self.pc + 1) if C == 0: comment = comment.replace("not ", "") elif const.opCode[opCode] == "OP_CALL": comment = "" for i in range(C - 1): comment += "R{}, ".format(A + i) if C > 1: comment = comment[:-2] + " := R{}(".format(A) elif C == 1: comment += " := R{}(".format(A) else: comment = "R{} to top := R{}(".format(A, A) for i in range(B - 1): comment += "R{}, ".format(A + i + 1) if B > 1: comment = comment[:-2] + ")" elif B == 1: comment += ")" else: comment += "R{} to top)".format(C) elif const.opCode[opCode] == "OP_TAILCALL": comment = "R{} to top := R{}(".format(A, A) for i in range(B - 1): comment += "R{}, ".format(A + i + 1) if B > 1: comment = comment[:-2] + ")" else: comment = comment + ")" elif const.opCode[opCode] == "OP_RETURN": for i in range(B - 1): comment += "R{}, ".format(A + i) if B > 1: comment = comment[:-2] elif B == 0: comment += "R{} to top".format(A) elif const.opCode[opCode] == "OP_FORLOOP": comment = comment.replace("RD", "R{}".format(A + 1)) comment = comment.replace("RE", "R{}".format(A + 2)) comment = comment.replace("RF", "R{}".format(A + 3)) comment += "goto {} end".format(self.pc + B + 1) elif const.opCode[opCode] == "OP_FORPREP": comment = comment.replace("RD", "R{}".format(A + 2)) comment += "(goto {})".format(self.pc + B + 1) elif const.opCode[opCode] == "OP_TFORCALL": comment = comment.replace("RD", "R{}".format(A + 1)) comment = comment.replace("RE", "R{}".format(A + 2)) comment = comment.replace("RF", "R{}".format(A + 3)) comment = comment.replace("RG", "R{}".format(A + 4)) elif const.opCode[opCode] == "OP_TFORLOOP": comment = comment.replace("RD", "R{}".format(A + 1)) comment += " (goto {}))".format(self.pc + B + 1) elif const.opCode[opCode] == "OP_CLOSURE": if self.currFunc == "root": comment += "function_{})".format(B) else: comment += self.currFunc + "_{})".format(B) elif const.opCode[opCode] == "OP_SETLIST": real_c = C err = False if C == 0: success, result = self.getExtraArg() if success: real_c = result else: comment += result err = True if not err: LFIELDS_PER_FLUSH = 50 start_index = (real_c - 1) * LFIELDS_PER_FLUSH if B == 0: comment += "R{}[{}] to R{}[top] := R{} to top".format(A, start_index, A, A + 1) elif B == 1: comment += "R{}[{}] := R{}".format(A, start_index, A + 1) else: comment += "R{}[{}] to R{}[{}] := R{} to R{}".format(A, start_index, A, start_index + B - 1, A + 1, A + B) if C == 0: comment += "; CONTAINS EXTRAARG" elif const.opCode[opCode] == "OP_LOADKX": success, result = self.getExtraArg() if success: Ax = result comment += "R{} := {{K{}}}".format(A, Ax).format(**self.fmtVals) else: comment += result seq = [] for i in [parsedA, parsedB, parsedC]: if i != "": seq.append(str(i)) regsFmt = " ".join(seq) if self.format == "luaasm": print("{:<10s}{:<13s} ; {:>5s} {}".format(const.opCode[opCode][3:], regsFmt, "[{}]".format(str(self.pc)), comment)) else: print("{:>5s} [-]: {:<10s}{:<13s}; {}".format(str(self.pc), const.opCode[opCode][3:], regsFmt, comment))
class RIAC(AbstractTeacher): def __init__(self, mins, maxs, seed, env_reward_lb, env_reward_ub, max_region_size=200, alp_window_size=None, nb_split_attempts=50, sampling_in_leaves_only=False, min_region_size=None, min_dims_range_ratio=1 / 6, discard_ratio=1 / 4): AbstractTeacher.__init__(self, mins, maxs, env_reward_lb, env_reward_ub, seed) # Maximal number of (task, reward) pairs a region can hold before splitting self.maxlen = max_region_size self.alp_window = self.maxlen if alp_window_size is None else alp_window_size # Initialize Regions' tree self.tree = Tree() self.regions_bounds = [Box(self.mins, self.maxs, dtype=np.float32)] self.regions_alp = [0.] self.tree.create_node('root', 'root', data=Region(maxlen=self.maxlen, r_t_pairs=[ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ], bounds=self.regions_bounds[-1], alp=self.regions_alp[-1])) self.nb_dims = len(mins) self.nb_split_attempts = nb_split_attempts # Whether task sampling uses parent and child regions (False) or only child regions (True) self.sampling_in_leaves_only = sampling_in_leaves_only # Additional tricks to original RIAC, enforcing splitting rules # 1 - Minimum population required for both children when splitting --> set to 1 to cancel self.minlen = self.maxlen / 20 if min_region_size is None else min_region_size # 2 - minimum children region size (compared to initial range of each dimension) # Set min_dims_range_ratio to 1/np.inf to cancel self.dims_ranges = self.maxs - self.mins self.min_dims_range_ratio = min_dims_range_ratio # 3 - If after nb_split_attempts, no split is valid, flush oldest points of parent region # If 1- and 2- are canceled, this will be canceled since any split will be valid self.discard_ratio = discard_ratio # book-keeping self.sampled_tasks = [] self.all_boxes = [] self.all_alps = [] self.update_nb = -1 self.split_iterations = [] self.hyperparams = locals() def compute_alp(self, sub_region): if len(sub_region[0]) > 2: cp_window = min(len(sub_region[0]), self.alp_window) # not completely window half = int(cp_window / 2) # print(str(cp_window) + 'and' + str(half)) first_half = np.array(sub_region[0])[-cp_window:-half] snd_half = np.array(sub_region[0])[-half:] diff = first_half.mean() - snd_half.mean() cp = np.abs(diff) else: cp = 0 alp = np.abs(cp) return alp def split(self, nid): # Try nb_split_attempts splits on region corresponding to node <nid> reg = self.tree.get_node(nid).data best_split_score = 0 best_bounds = None best_sub_regions = None is_split = False for i in range(self.nb_split_attempts): sub_reg1 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] sub_reg2 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] # repeat until the two sub regions contain at least minlen of the mother region while len(sub_reg1[0]) < self.minlen or len( sub_reg2[0]) < self.minlen: # decide on dimension dim = self.random_state.choice(range(self.nb_dims)) threshold = reg.bounds.sample()[dim] bounds1 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32) bounds1.high[dim] = threshold bounds2 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32) bounds2.low[dim] = threshold bounds = [bounds1, bounds2] valid_bounds = True if np.any(bounds1.high - bounds1.low < self.dims_ranges * self.min_dims_range_ratio): valid_bounds = False if np.any(bounds2.high - bounds2.low < self.dims_ranges * self.min_dims_range_ratio): valid_bounds = valid_bounds and False # perform split in sub regions sub_reg1 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] sub_reg2 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] for i, task in enumerate(reg.r_t_pairs[1]): if bounds1.contains(task): sub_reg1[1].append(task) sub_reg1[0].append(reg.r_t_pairs[0][i]) else: sub_reg2[1].append(task) sub_reg2[0].append(reg.r_t_pairs[0][i]) sub_regions = [sub_reg1, sub_reg2] # compute alp alp = [self.compute_alp(sub_reg1), self.compute_alp(sub_reg2)] # compute score split_score = len(sub_reg1) * len(sub_reg2) * np.abs(alp[0] - alp[1]) if split_score >= best_split_score and valid_bounds: is_split = True best_split_score = split_score best_sub_regions = sub_regions best_bounds = bounds if is_split: # add new nodes to tree for i, (r_t_pairs, bounds) in enumerate(zip(best_sub_regions, best_bounds)): self.tree.create_node(identifier=self.tree.size(), parent=nid, data=Region(self.maxlen, r_t_pairs=r_t_pairs, bounds=bounds, alp=alp[i])) else: assert len(reg.r_t_pairs[0]) == (self.maxlen + 1) reg.r_t_pairs[0] = deque( islice(reg.r_t_pairs[0], int(self.maxlen * self.discard_ratio), self.maxlen + 1)) reg.r_t_pairs[1] = deque( islice(reg.r_t_pairs[1], int(self.maxlen * self.discard_ratio), self.maxlen + 1)) return is_split def add_task_reward(self, node, task, reward): reg = node.data nid = node.identifier if reg.bounds.contains(task): # task falls within region self.nodes_to_recompute.append(nid) children = self.tree.children(nid) for n in children: # if task in region, task is in one sub-region self.add_task_reward(n, task, reward) need_split = reg.add(task, reward, children == []) # COPY ALL MODE if need_split: self.nodes_to_split.append(nid) def episodic_update(self, task, reward, is_success): self.update_nb += 1 # Add new (task, reward) to regions nodes self.nodes_to_split = [] self.nodes_to_recompute = [] new_split = False root = self.tree.get_node('root') self.add_task_reward( root, task, reward) # Will update self.nodes_to_split if needed assert len(self.nodes_to_split) <= 1 # Split a node if needed need_split = len(self.nodes_to_split) == 1 if need_split: new_split = self.split(self.nodes_to_split[0]) # Execute the split if new_split: # Update list of regions_bounds if self.sampling_in_leaves_only: self.regions_bounds = [ n.data.bounds for n in self.tree.leaves() ] else: self.regions_bounds = [ n.data.bounds for n in self.tree.all_nodes() ] # Recompute ALPs of modified nodes for nid in self.nodes_to_recompute: node = self.tree.get_node(nid) reg = node.data reg.alp = self.compute_alp(reg.r_t_pairs) # Collect regions data (regions' ALP and regions' (task, reward) pairs) all_nodes = self.tree.all_nodes( ) if not self.sampling_in_leaves_only else self.tree.leaves() self.regions_alp = [] self.r_t_pairs = [] for n in all_nodes: self.regions_alp.append(n.data.alp) self.r_t_pairs.append(n.data.r_t_pairs) # Book-keeping if new_split: self.all_boxes.append(copy.copy(self.regions_bounds)) self.all_alps.append(copy.copy(self.regions_alp)) self.split_iterations.append(self.update_nb) assert len(self.regions_alp) == len(self.regions_bounds) return new_split, None def sample_random_task(self): return self.regions_bounds[0].sample() # First region is root region def sample_task(self): mode = self.random_state.rand() if mode < 0.1: # "mode 3" (10%) -> sample on regions and then mutate lowest-performing task in region if len(self.sampled_tasks) == 0: self.sampled_tasks.append(self.sample_random_task()) else: self.sampled_tasks.append( self.non_exploratory_task_sampling()["task"]) elif mode < 0.3: # "mode 2" (20%) -> random task self.sampled_tasks.append(self.sample_random_task()) else: # "mode 1" (70%) -> proportional sampling on regions based on ALP and then random task in selected region region_id = proportional_choice(self.regions_alp, self.random_state, eps=0.0) self.sampled_tasks.append(self.regions_bounds[region_id].sample()) return self.sampled_tasks[-1].astype(np.float32) def non_exploratory_task_sampling(self): # 1 - Sample region proportionally to its ALP region_id = proportional_choice(self.regions_alp, self.random_state, eps=0.0) # 2 - Retrieve (task, reward) pair with lowest reward worst_task_idx = np.argmin(self.r_t_pairs[region_id][0]) # 3 - Mutate task by a small amount (using Gaussian centered on task, with 0.1 std) task = self.random_state.normal( self.r_t_pairs[region_id][1][worst_task_idx].copy(), 0.1) # clip to stay within region (add small epsilon to avoid falling in multiple regions) task = np.clip(task, self.regions_bounds[region_id].low + 1e-5, self.regions_bounds[region_id].high - 1e-5) return { "task": task, "infos": { "bk_index": len(self.all_boxes) - 1, "task_infos": region_id } } def dump(self, dump_dict): dump_dict['all_boxes'] = self.all_boxes dump_dict['split_iterations'] = self.split_iterations dump_dict['all_alps'] = self.all_alps # dump_dict['riac_params'] = self.hyperparams return dump_dict @property def nb_regions(self): return len(self.regions_bounds) @property def get_regions(self): return self.regions_bounds
class PathList: def __init__(self, disk): self._tree = Tree() self._disk = disk self._tree.create_node(tag='root', identifier='root') self.depth = 3 def update_path_list(self, file_id='root', depth=None, is_fid=True): if depth is None: depth = self.depth if not is_fid: file_id = self.get_path_fid(file_id, auto_update=False) file_list = self._disk.get_file_list(file_id) if 'items' not in file_list: return False for i in file_list['items']: if i['type'] == 'file': file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=True, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden'], category=i['category'], size=i['size'], content_hash_name=i['content_hash_name'], content_hash=i['content_hash'], download_url=i['download_url']) else: file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=False, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden']) if self._tree.get_node(file_info.id): self._tree.update_node(file_id, data=file_info) else: self._tree.create_node(tag=file_info.name, identifier=file_info.id, data=file_info, parent=file_id) if not file_info.type and depth: self.update_path_list(file_id=file_info.id, depth=depth - 1) return True def tree(self, path='root', auto_update=True): file_id = self.get_path_fid(path, auto_update=auto_update) if not file_id: raise Exception('No such file or directory') self._tree.show(file_id) def get_path_list(self, path, auto_update=True): file_id = self.get_path_fid(path, auto_update=auto_update) return self.get_fid_list(file_id, auto_update=auto_update) def get_fid_list(self, file_id, auto_update=True): self.auto_update_path_list(auto_update) if not file_id: raise Exception('No such file or directory') if file_id != 'root' and self._tree.get_node(file_id).data.type: return [self._tree.get_node(file_id).data] return [i.data for i in self._tree.children(file_id)] def get_path_fid(self, path, file_id='root', auto_update=True): self.auto_update_path_list(auto_update) path = Path(path) if str(path) in ('', '/', '\\', '.', 'root'): return 'root' flag = False for i in filter(None, path.as_posix().split('/')): flag = False for j in self._tree.children(file_id): if i == j.tag: flag = True file_id = j.identifier break if flag: return file_id return False def get_path_node(self, path, auto_update=True): file_id = self.get_path_fid(path, auto_update=auto_update) if file_id: return self._tree.get_node(file_id) return False def get_path_parent_node(self, path, auto_update=True): file_id = self.get_path_fid(path, auto_update=auto_update) if file_id: node = self._tree.parent(file_id) if node: return node return False def auto_update_path_list(self, auto_update=True): if auto_update and len(self._tree) == 1: return self.update_path_list()
class PathList: def __init__(self, disk): self._tree = Tree() self._disk = disk self._tree.create_node(tag='root', identifier='root') def update_path_list(self, path='root', depth=3): for i in self._disk.get_file_list(path)['items']: if i['type'] == 'file': file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=True, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden'], category=i['category'], size=i['size'], content_hash_name=i['content_hash_name'], content_hash=i['content_hash'], download_url=i['download_url']) else: file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=False, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden']) self._tree.create_node(tag=file_info.name, identifier=file_info.id, data=file_info, parent=path) if not file_info.type and depth: self.update_path_list(path=file_info.id, depth=depth - 1) def tree(self, path): if len(self._tree) == 1: self.update_path_list() elif len(self._tree) > 1: self.__init__(self._disk) self.update_path_list() file_id = self.get_path_fid(path) if not file_id: raise Exception('No such file or directory') self._tree.show(file_id) def get_path_list(self, path): file_id = self.get_path_fid(path) if not file_id: raise Exception('No such file or directory') if file_id != 'root' and self._tree.get_node(file_id).data.type: return [self._tree.get_node(file_id).data] return [i.data for i in self._tree.children(file_id)] def get_path_fid(self, path, file_id='root'): if len(self._tree) == 1: self.update_path_list() elif len(self._tree) > 1: self.__init__(self._disk) self.update_path_list() if path == '/' or path == '' or path == 'root': return 'root' flag = False for i in filter(None, path.split('/')): flag = False for j in self._tree.children(file_id): if i == j.tag: flag = True file_id = j.identifier break if flag: return file_id return False
class MonteCarlo: N_THREADS = 1 PERCENTILE = 100 def __init__(self, engine=None, hero=None): # self.last_ev = 0 # self.rolling_10 = deque(maxlen=10) # self.rolling_40 = deque(maxlen=40) self.ev_history = {} self.time_start = None self.duration = None self.queue = None self.leaf_path = None if not engine: # logger.info('engine not given, loading from file...') self.engine_checksum = None self.load_engine(hero) else: # logger.info('engine given') self.init(engine, hero) @property def current_actions(self): return [(c.data['action'], c.data['ev'], c.data['traversed']) for c in self.tree.children(self.tree.root)] def is_time_left(self): return time.time() - self.time_start < self.duration @retrace.retry(on_exception=(EOFError, KeyError), interval=0.1, limit=None) def load_engine(self, hero): with shelve.open(Engine.FILE) as shlv: if shlv['hash'] != self.engine_checksum: # logger.info('loading engine from file...') self.engine_checksum = shlv['hash'] self.init(shlv['engine'], hero) def init(self, engine, hero): # logger.info('init state') self.engine = engine self.hero = hero or self.engine.q[0][0] self.hero_pocket = self.engine.data[self.hero]['hand'] for s in self.engine.data: self.ev_history[s] = deque(maxlen=50) # logger.info('HERO is at seat {} with {}'.format(self.hero, self.hero_pocket)) self.watched = False self.init_tree() def init_tree(self): """create the tree. Add a root; available action will add the first level of children""" # self.traversed_ceiling = 1 self.tree = Tree() root = self.tree.create_node('root', identifier='root', data={'traversed': 0, 'ev': 0, 'stats': 1, 'cum_stats': 1}) # # logger.info('tree:\n{}'.format(self.tree.show())) # input('new tree') def watch(self): """Runs when engine file changes. Just kicks off run for 3s sprints""" # logger.info('Monte Carlo watching every {}s...'.format(self.timeout)) while True: # loads new engine file if checksum changed self.load_engine() # do not analyze if game finished if self.engine.phase in [self.engine.PHASE_SHOWDOWN, self.engine.PHASE_GG]: if not self.watched: # logger.error('game is finished') self.watched = True time.sleep(3) continue # do not analyze if hero does not have pocket if self.hero_pocket in [['__', '__'], [' ', ' ']]: if not self.watched: # logger.error('hero does not have a pocket') self.watched = True time.sleep(0.5) continue # do not analyze if hero is not to play if self.hero != self.engine.q[0][0]: if not self.watched: # logger.error('hero is not to act') self.watched = True time.sleep(0.5) continue if self.is_complete: if not self.watched: # logger.error('mc is complete') self.watched = True time.sleep(2) continue # run a few sims # logger.debug('running now with timeout {}'.format(self.timeout)) self.run() self.timeout += 0.1 def run(self, duration): """Run simulations For x: - clone engine - start at root -- iterate and find next unprocessed node -- action engine to that node parent -- process that node - keep processing - with return EV Levelling: extremely huge iterations when many players. So do the most probably actions only till all done. Handling close action approximations: """ # logger.info('Monte Carlo started') total_traversions_start = sum(a[2] for a in self.current_actions) # cannot run if engine in showdown or gg if self.engine.phase in [self.engine.PHASE_SHOWDOWN, self.engine.PHASE_GG]: logger.warning('cannot run mc with no actions') return self.duration = duration self.time_start = time.time() self.queue = PriorityQueue() # threads = [] # for _ in range(self.N_THREADS): # t = MCWorker(self) # # t.start() # threads.append(t) # self.traversed_focus = 0 leaves = self.tree.paths_to_leaves() # logger.debug('leaves from tree: {}'.format(len(leaves))) # leaves.sort(key=lambda lp: len(lp) + sum(int(lpn.split('_')[0]) for lpn in lp), reverse=True) # # logger.debug('{} leaves are now sorted by formula'.format(len(leaves))) # logger.debug('{}'.format(json.dumps(leaves[:3], indent=4, default=str))) # leaves.sort(key=len) # logger.debug('{} leaves are now sorted by length'.format(len(leaves))) # logger.debug('{}'.format(json.dumps(leaves[:3], indent=4, default=str))) # leaves.sort(key=lambda lp: int(lp[-1][:3]), reverse=True) # logger.debug('{} leaves are now sorted by rank'.format(len(leaves))) # logger.error(json.dumps(leaves, indent=4, default=str)) # input('>>') for leaf_path in leaves: node = self.tree[leaf_path[-1]] item = ( 1 - node.data['cum_stats'], leaf_path, ) self.queue.put(item) # for t in threads: # t.start() # # for t in threads: # t.join() # if t.error: # raise Exception().with_traceback(t.error[2]) while self.is_time_left() and not self.queue.empty(): priority, self.leaf_path = self.queue.get_nowait() self.run_item(self.leaf_path) if self.queue.empty(): logger.info(f'Everything was processed in queue!') total_traversions_end = sum(a[2] for a in self.current_actions) if total_traversions_end <= total_traversions_start: logger.warning(f'No new traversion added to {total_traversions_start}') def run_item(self, path): # logger.debug('running this path: {}'.format(path)) e = deepcopy(self.engine) e.mc = True """To calculate the investment for the loss EV, the total amounts used till end is required. Cannot use final player balance on engine as that might have winnings allocated to it by the engine. Instead the difference from all the new matched bets from the current matched bets will be used. Need to add current contrib """ e.matched_start = e.data[self.hero]['matched'] + e.data[self.hero]['contrib'] # logger.info('hero starting with matched = {} from {} + {}'.format( # e.matched_start, e.data[self.hero]['matched'], e.data[self.hero]['contrib'])) # self.tree.show() self.fast_forward(e, path) # logger.info('{}'.format('-' * 200)) # input('check item') def show_best_action(self): """Calculates best action on root""" # logger.error("\n\n") sum_traversed = 0 delta = 0 max_ev = float('-inf') action = None amount = None for nid in self.tree[self.tree.root].fpointer: child = self.tree[nid] # logger.debug('{} {}'.format(child.tag, child.data)) dat = child.data sum_traversed += dat['traversed'] # logger.error('{} @{} => {}'.format(dat['action'], dat['traversed'], round(dat['ev'], 4))) # delta += abs(1 - (self.convergence.get(dat['action'], 1) / dat['ev'] if dat['ev'] else 1)) # self.convergence[dat['action']] = dat['ev'] if dat['ev'] > max_ev: max_ev = dat['ev'] action = dat['action'] if action.startswith('bet') or action.startswith('raise') or action.startswith('allin'): amount = dat['amount'] best_action = '{}{}'.format(action, ' with {}'.format(amount) if amount else '') # self.convergence['deq'].append(round(delta, 1)) self.convergence['deq'].append(best_action) # # logger.error('deq: {}'.format(list(self.convergence['deq']))) # logger.error('') # logger.error('Timeout: {}'.format(round(self.timeout, 1))) # logger.error('Traversed: {}'.format(sum_traversed)) deq_cnts = Counter(list(self.convergence['deq'])) # # logger.error('deq: {}'.format(deq_cnts.most_common())) # logger.error('{}% for {}'.format( # 100 * sum(dq == deq_list[-1] for dq in deq_list[:-1]) // (len(deq_list) - 1) # 100 * (deq_cnts.most_common()[0][1] - deq_cnts.most_common()[1][1]) // self.convergence_size # if len(deq_cnts) > 1 else 100 * len(self.convergence['deq']) // self.convergence_size, # deq_cnts.most_common()[0][0] # )) def fast_forward(self, e, path): """Do actions on engine till the leaf is reached. Need to do available_actions before every DO First check if the leave is already processed, then skip this path. When the leaf is reached then process from that node. Remember to send through only the first letter for the action. Then update the nodes from this leaf back up the tree """ # logger.info('Fast forwarding {} nodes'.format(len(path))) if len(path) == 1: # logger.info('processing root for first time') self.process_node(e, self.tree[path[0]]) return leaf_node = self.tree[path[-1]] # logger.debug('checking if last node has been processed:') # logger.debug('last node leaf {} has node data {}'.format(leaf_node.tag, leaf_node.data)) if leaf_node.data['traversed']: # logger.info('This leaf node ({}) above focus level {}'.format(leaf_node.tag, self.traversed_focus)) # can happen as all actions are added, but then one was chosen to continue on # and that path for that action wasn't removed from the queue return for nid in path[1:]: node = self.tree[nid] # logger.debug('fast forwarding action for node {}'.format(node.tag)) e.available_actions() cmd = [node.data['action'][0]] if 'amount' in node.data: cmd.append(node.data['amount']) # logger.debug('Adding bet value of {}'.format(node.data['amount'])) # logger.debug('Executing path action {} for {}'.format(cmd, node.tag)) # logger.debug('Executing path action {} with data {}'.format(cmd, node.data)) e.do(cmd) if node.is_leaf(): # logger.debug('{} is a leaf node, processing next...'.format(node.tag)) self.process_node(e, node) logger.info('nodes processed, now updating nodes that were fast forwarded') for processed_nid in reversed(path[1:]): processed_node = self.tree[processed_nid] self.update_node(processed_node) self.ev_history[self.engine.s].append(sum(a[1] for a in self.current_actions)) def process_node(self, e, n): """Process node Get actions available for node Pick action to traverse with UCT Process action selected Return EV """ # logger.info('processing node {} with data {}'.format(n.tag, n.data)) # this node is the hero folding (to prevent this being processed as leaf) # was created with other children (but not most probable at that time to be proc as child) # if hero folding, then make this node a leaf node with fold eq # exiting before adding children alleviates the need to remove the immediately again thereafter # bug: cannot use engine.q as it already rotated after taking action getting here if not n.is_root() and n.data['action'] == 'fold' and self.hero == n.data['seat']: winnings, losses = self.net(e) result = { 'ev': losses, 'traversed': 1, } # logger.info('hero has folded this node given: {}'.format(result)) n.data.update(result) # logger.info('node data after fold: {}'.format(n.data)) return # add the children of the node if not n.fpointer: self.add_actions(e, n) # this node is a leaf (no more actions to take!) # either the game finished and we have winner and pot # or we have to use pokereval.winners if n.is_leaf(): # logger.info('node {} is the final action in the game'.format(n.tag)) # winner given (easy resolution) if e.winner: # logger.debug('engine gave winner {}'.format(e.winner)) winnings, losses = self.net(e) ev = winnings if self.hero in e.winner else losses # else if the winner is unknown # then calculate winners and use # percentage of hero as amt else: if 'in' not in e.data[self.hero]['status']: # hero fold is handled before in method # and thus for equities calc it is just 0 # logger.debug('Hero {} is not in game'.format(self.hero)) ev = 0 else: winnings, losses = self.net(e) equities = PE.showdown_equities(e) # equities = self.get_showdown_equities(e) ev_pos = winnings * equities[self.hero] # logger.debug('ev_pos = {} from winnings {} * eq {}'.format(ev_pos, winnings, equities[self.hero])) ev_neg = losses * (1 - equities[self.hero]) # logger.debug('ev_neg = {} from losses {} * -eq {}'.format(ev_neg, losses, (1 - equities[self.hero]))) ev = ev_pos + ev_neg logger.info('Net EV: {} from {} + {}'.format(ev, ev_pos, ev_neg)) result = { 'ev': ev, 'traversed': 1, } # logger.info('{} leaf has result {}'.format(n.tag, result)) n.data.update(result) return # node is all good (not leaf (has children) and not hero folding) # get child actions and process most probable action a_node = self.most_probable_action(n) action = a_node.data['action'] # logger.info('taking next child node action {}'.format(action)) # if it is hero and he folds, # it is not necessarily an immediate ZERO equity # since my previous contrib needs to be added to the pot (i.e. contribs after starting mc) # i.e. make this a leaf node implicitly # no child nodes to remove for fold if action == 'fold' and self.hero == a_node.data['seat']: winnings, losses = self.net(e) result = { 'ev': losses, 'traversed': 1, } # logger.info('hero has folded the child node selected: {}'.format(result)) a_node.data.update(result) # logger.info('a_node data after: {}'.format(a_node.data)) # else we must process the node else: # logger.info('taking action {} and processing that node'.format(action)) cmd = [action[0]] if 'amount' in a_node.data: cmd.append(a_node.data['amount']) # logger.debug('Adding bet value of {}'.format(a_node.data['amount'])) e.do(cmd) self.process_node(e, a_node) # action node has been processed, now update node self.update_node(n) def update_node(self, node): """Update the node's data If leaf, then it was already calculated during processing, and now do not change it: the ev is the ev Minimax applied, hero pick best and foe picks min after p Traversed will stay the traversed_focus level for leaves, but for parent nodes the traversed will be the number of leaves reached from that node. """ is_hero = node.data.get('seat') == self.hero # logger.debug('is hero? {}'.format(is_hero)) # it will traverse back up to the root # root can be skipped if node.is_root(): # input('hero {} node data {}'.format(self.hero, node.data.get('seat'))) # if is_hero: # self.rolling_10.append(abs(self.last_ev)) # self.rolling_40.append(abs(self.last_ev)) # logger.debug('Added {} ev to collection'.format(self.last_ev)) # input('Added {} ev to collection'.format(self.last_ev)) # logger.debug('reached the root') # self.update_ev_change() return # fast forwarding will send here, just ignore node if leaf if node.is_leaf(): # logger.debug('not updating {}: it is final game result (no leaf nodes)'.format(node.tag)) # logger.debug('not updating {}: final data {}'.format(node.tag, node.data)) return depth = self.tree.depth(node) # logger.info('updating node {} at depth {}'.format(node.tag, depth)) # logger.info('node has {} before update'.format(node.data)) if not len(node.fpointer): # logger.error('node {} with {} as no children...'.format(node.tag, node.data)) raise Exception('not necessary to process leaves') # logger.debug('extracting data from {} children nodes...'.format(len(node.fpointer))) n_ev = float('-inf') if is_hero else 0 n_traversed = 0 for child_nid in node.fpointer: child_node = self.tree[child_nid] # logger.debug('child node {} has {}'.format(child_node.tag, child_node.data)) dat = child_node.data if not dat['traversed']: # logger.debug('skipping untraversed {}'.format(child_node.tag)) continue # get max for hero if is_hero: # todo is this +ev dampening necessary # todo this should be fixed when setting for hand range # equities = PE.showdown_equities(self.engine) # n_ev = max(n_ev, dat['ev'] * equities.get(self.hero, 0)) n_ev = max(n_ev, dat['ev']) # get min for foe else: # ev_adj = dat['ev'] * dat['stats'] # logger.debug('foe min between {} and {}'.format(n_ev, ev_adj)) # n_ev = min(n_ev, ev_adj) n_ev += dat['ev'] * dat['stats'] / dat['divider'] n_traversed += dat['traversed'] # logger.debug('added {} traversed: now have {} so far'.format(dat['traversed'], n_traversed)) self.last_ev = node.data['ev'] - n_ev node.data.update({ 'ev': n_ev, 'traversed': n_traversed, }) # logger.info('now node has {} ev~{} after {}'.format(node.tag, round(n_ev, 3), n_traversed)) if not node.data['traversed']: raise Exception('node cannot be untraversed') def net(self, e): """Stored the balance at the start of sim. Now calculate difference as player total matched contrib. Winnings will be less initial starting contrib. """ e.gather_the_money() p = e.players[self.hero] d = e.data[self.hero] matched_diff = d['matched'] - e.matched_start # logger.debug('matched diff = {} from {} - {}'.format(matched_diff, d['matched'], e.matched_start)) winnings = int(e.pot - matched_diff) # logger.debug('winnings diff = {} from pot {} less matched {}'.format(winnings, e.pot, matched_diff)) losses = int(-matched_diff) # logger.info('Winnings = {} and losses = {}'.format(winnings, losses)) return winnings, losses def most_probable_action(self, parent): """All nodes will be processed once at least but it will never happen. Just return the most probable node for most accurate play. Using stats fields on data There should not be any untraversed nodes. So first get untraversed, then sort and pop first one""" # logger.info('getting most probable action after {}'.format(parent.tag)) children = self.tree.children(parent.identifier) children = [c for c in children if not c.data['traversed']] if not children: raise MonteCarloError('Cannot choose most probable action when all nodes are traversed') children.sort(key=lambda c: c.data['stats'], reverse=True) child = children[0] # logger.debug('{} is untraversed, returning that node for actioning'.format(child.tag)) self.leaf_path.append(child.identifier) return child def add_actions(self, e, parent): """Add actions available to this node If in GG phase then no actions possible, ever. Remove 'hand' Bets: - preflop are 2-4x BB - postflop are 40-100% pot Raise: - always double Allin: - only on river - if out of money then converted to allin Scale non-fold probabilities even though it should not have an effect. """ # logger.info('adding actions to {}'.format(parent.tag)) actions = e.available_actions() s, p = e.q[0] d = e.data[s] balance_left = p['balance'] - d['contrib'] if not actions: # logger.warn('no actions to add to node') return if 'gg' in actions: # logger.debug('no actions available, got gg') return actions.remove('hand') # remove fold if player can check if 'check' in actions: actions.remove('fold') # # logger.debug('removed fold when check available') # remove fold for hero # if s == self.hero and 'fold' in actions: # actions.remove('fold') # # logger.debug('removed fold from hero') # remove raise if player has already been aggressive if 'raise' in actions and any(pa['action'] in 'br' for pa in d[e.phase]): actions.remove('raise') # # logger.debug('removed raise as player has already been aggressive') # remove allin, but add it later with final stats (if increased from bet/raised) if 'allin' in actions: actions.remove('allin') # logger.debug('removed allin by default') # load stats (codes with counts) stats = ES.player_stats(e, s) max_contrib = max(pd['contrib'] for pd in e.data.values()) # contrib_short = max_contrib - d['contrib'] # allin needs to be the doc count # where bets and raises result in allin, add those prob dists to this # that will give proper probability go_allin = stats['actions'].get('a', 0) # # logger.info('filtered actions: {}'.format(actions)) # ev 0 instead of none because of root node sum when not all traversed it gives error action_nodes = [] for a in actions: node_data = { 'stats': stats['actions'].get(ACTIONS_TO_ABBR[a], 0.01), 'divider': 1, 'action': a, 'phase': e.phase, 'seat': s, 'name': p['name'], 'traversed': 0, 'ev': 0, } if a in ['bet', 'raise']: btps_and_amts = [] total_pot = sum(pd['contrib'] for pd in e.data.values()) + e.pot # for preflop only do 2x and 3x if e.phase == e.PHASE_PREFLOP: btps_and_amts.append(('double', e.bb_amt * 2)) btps_and_amts.append(('triple', e.bb_amt * 3)) # else do half and full pots else: btps_and_amts.append(('half_pot', total_pot * 0.50)) btps_and_amts.append(('full_pot', total_pot * 1.00)) # round bets up to a BB # btps_and_amts = [(btp, -(amt // -e.bb_amt) * e.bb_amt) # for btp, amt in btps_and_amts] betting_info = [] amts_seen = [] for btp, amt in btps_and_amts: if amt in amts_seen: # logger.debug('already using {}, skipping duplicate'.format(amt)) continue if a == 'bet' and amt < e.bb_amt: # logger.debug('bet cannot be less than BB {}'.format(e.bb_amt)) continue if a == 'raise' and amt < (max_contrib * 2): # logger.debug('raise cannot be less than 2x contrib of {}'.format(max_contrib * 2)) continue betting_info.append((btp, amt)) amts_seen.append(amt) # change raises that cause allin betting_info_final = [] for btp, amt in betting_info: # if amt is more than player balance, it is an allin if amt >= balance_left: go_allin += node_data['stats'] / len(betting_info) else: betting_info_final.append((btp, amt)) # all good, can have this bet as option for btp, amt in betting_info_final: node_data_copy = deepcopy(node_data) node_data_copy['divider'] = len(betting_info_final) node_data_copy['action'] = f'{a}_{btp}' node_data_copy['amount'] = amt action_nodes.append(node_data_copy) else: action_nodes.append(node_data) # allin will have doc counts (from stat, maybe from bets, maybe from raise) if go_allin: node_data = { 'stats': go_allin, 'divider': 1, 'action': 'allin', 'phase': e.phase, 'seat': s, 'name': p['name'], 'traversed': 0, 'ev': 0, 'amount': balance_left, } action_nodes.append(node_data) # logger.debug('added allin to actions with stat {}'.format(node_data['stats'])) # scale the stats (it is currently term counts aka histogram) and it is required to be # a probability distribution (p~1) # Also, certain actions like fold can be removed, and the total stats is not 1 total_stats = sum(an['stats'] / an['divider'] for an in action_nodes) for action_node in action_nodes: action_node['stats'] = max(0.01, action_node['stats'] / action_node['divider'] / total_stats) action_node['cum_stats'] = parent.data['cum_stats'] * action_node['stats'] node_tag = f'{action_node["action"]}_{s}_{e.phase}' identifier = f'{node_tag}_{str(uuid.uuid4())[:8]}' self.tree.create_node(identifier=identifier, tag=node_tag, parent=parent.identifier, data=action_node) # logger.debug('new {} for {} with data {}'.format(node_tag, s, action_node)) item = ( 1 - action_node['cum_stats'], self.leaf_path + [identifier] ) self.queue.put(item) # logger.debug('new {} for {} with data {}'.format(node_tag, s, action_node)) # logger.info('{} node actions added'.format(len(action_nodes))) def analyze_tree(self): """Analyze tree to inspect best action from ev""" # self.tree.show() # check all finished paths for path in self.tree.paths_to_leaves(): # skip untraversed end last_node = self.tree[path[-1]] if not last_node.data['traversed']: logger.debug('skipping untraversed endpoint {}'.format(last_node.tag)) continue # show all actions for nid in path: node = self.tree[nid] d = node.data logger.info('Node: {} ev={}'.format(node.tag, d['ev'])) 0/0 input('$ check tree') def get_showdown_equities(self, e): """instead of using pokereval, use hs from se""" hss = {} for s, d in e.data.items(): if 'in' in d['status']: hss[s] = ES.showdown_hs(e, s, percentile=self.PERCENTILE) # calculate for hero if self.hero in hss: d = e.data[self.hero] hss[self.hero] = PE.hand_strength(d['hand'], e.board, e.rivals) # normalize total = sum(hs for hs in hss.values()) equities = {s: hs / total for s, hs in hss.items()} return equities
class RMQueue(object): __metaclass__ = Singleton def __init__(self): self.tree = Tree() self.MAX_METRIC_COUNT = 12 self.CAL_INTERVAL_IN_SECOND = 2 * 60 * 60 self.conf = conf.Config("./conf/config.json") def set_stat_interval(self, interval): self.CAL_INTERVAL_IN_SECOND = interval def set_system_memory(self, size): root = self.get_root() root.data.set_abs_memory(float(size)) def create_queue(self, name=None, parent=None): data = QueueData() self.tree.create_node(name, name, parent, data) def display(self): self.tree.show() def display_score(self, queue=None, depth=0, table=None, printer=None): flag = False if queue is None: queue = self.get_root() flag = True table = PrettyTable([ "QUEUE", "PENDING AVG", "PENDING DIV", "MEMORY USAGE AVG(Q)", "MEMORY USAGE AVG(C)", "MEMORY USAGE DIV", "ABS CAPACITY" ]) if table is not None: table.add_row([ queue.tag, 0 if queue.data.get_pending() == 0 else "%.3f" % queue.data.get_pending(), 0 if queue.data.get_pending_div() == 0 else "%.3f" % queue.data.get_pending_div(), 0 if queue.data.get_mem_usage() == 0 else "%.3f" % queue.data.get_mem_usage(), 0 if queue.data.cal_queue_memory_usage() == 0 else "%.3f" % queue.data.cal_queue_memory_usage(), 0 if queue.data.get_mem_usage_div() == 0 else "%.3f" % queue.data.get_mem_usage_div(), str(0 if queue.data.get_abs_capacity() == 0 else "%.3f" % queue.data.get_abs_capacity()) + " %" ]) if not self.is_leaf(queue.tag): children = self.tree.children(queue.tag) for child in children: self.display_score(child, depth + 1, table) if flag: if printer is None: print('------------' + utils.get_str_time() + ' SCORE ----------') print table else: printer.write('\n------------' + utils.get_str_time() + ' SCORE ----------\n') printer.write(str(table)) def display_prediction(self, queue=None, depth=0, table=None, printer=None): flag = False if queue is None: queue = self.get_root() flag = True table = PrettyTable([ "QUEUE", "DESIRED CAPACITY(Q)", "DESIRED CAPACITY(C)", "ABS CAPACITY" ]) if table is not None: table.add_row([ queue.tag, str(0 if queue.data.wish.capacity == 0 else "%.3f" % (100 * queue.data.wish.capacity)) + " %", 0 if queue.data.wish.abs_capacity == 0 else "%.3f" % queue.data.wish.abs_capacity, str(0 if queue.data.config.abs_capacity == 0 else "%.3f" % queue.data.config.abs_capacity) + " %" ]) if not self.is_leaf(queue.tag): children = self.tree.children(queue.tag) for child in children: self.display_prediction(child, depth + 1, table) if flag: if printer is None: print('------------' + utils.get_str_time() + ' PREDICTION ----------') print table else: printer.write('\n------------' + utils.get_str_time() + ' PREDICTION ----------\n') printer.write(str(table)) def write_score(self, path): FileOperator.touch(path) with open(path, 'a') as f: self.display_score(printer=f) def request_score(self, queue=None): if queue is None: queue = self.get_root() postData = { 'queue': queue.tag, 'pending': queue.data.get_pending(), 'pending_div': queue.data.get_pending_div(), 'memory_usage': queue.data.get_mem_usage(), 'memory_usage_div': queue.data.get_mem_usage_div(), 'abs_capacity': queue.data.get_abs_capacity() } requests.post(str(self.conf.es_rest_address) + str(self.conf.es_index) + "score", data=json.dumps(postData)) if not self.is_leaf(queue.tag): children = self.tree.children(queue.tag) for child in children: self.request_score(child) def request_prediction(self, queue=None): if queue is None: queue = self.get_root() postData = { 'queue': queue.tag, 'wish_capacity': queue.data.wish.capacity, 'wish_abs_capacity': queue.data.wish.abs_capacity, 'abs_capacity': queue.data.config.abs_capacity } requests.post(str(self.conf.es_rest_address) + str(self.conf.es_index) + "prediction", data=json.dumps(postData)) if not self.is_leaf(queue.tag): children = self.tree.children(queue.tag) for child in children: self.request_prediction(child) def write_prediction(self, path): FileOperator.touch(path) with open(path, 'a') as f: self.display_prediction(printer=f) def add_job(self, job, qname): queue = self.tree.get_node(qname) if queue.is_leaf(): queue.data.add_job(job) else: print("Cannot add jobs to parent queue", queue.tag, queue.identifier) def add_metric(self, qname): queue = self.tree.get_node(qname) queue.data.add_metric(queue.cur_metric) if len(queue.data.metrics) > RMQueue.MAX_METRIC_COUNT: del queue.data.metrics[0] def remove_queue(self, qname): self.tree.remove_node(qname) def move_queue(self, src, dest): self.tree.move_node(src, dest) def get_queue(self, qname): return self.tree.get_node(qname) def get_root(self): return self.get_queue('root') def is_leaf(self, qname): queue = self.tree.get_node(qname) return queue.is_leaf() def cal_slowdown(self, queue=None): if queue is None: queue = self.get_root() avg_slowdown = 0.0 if queue.is_leaf(): job_count = len(queue.data.jobs) for i in list(range(job_count)): job = queue.data.jobs[i] slowdown = (job.wait_time + job.run_time) / job.run_time avg_slowdown += slowdown / job_count queue.data.set_job_count(job_count) queue.data.cur_metric.slowdown = avg_slowdown else: children = self.tree.children(queue.tag) for child in children: self.cal_slowdown(child) job_count = 0 for child in children: job_count += child.data.get_job_count() queue.data.set_job_count(job_count) if job_count == 0: queue.data.cur_metric.slowdown = avg_slowdown return avg_slowdown for child in children: avg_slowdown += child.data.get_job_count( ) * child.data.get_slowdown() / job_count queue.data.cur_metric.slowdown = avg_slowdown return queue.data.get_slowdown() def cal_pending(self, queue=None): if queue is None: queue = self.get_root() if queue.is_leaf(): if len(queue.data.pendings) > 0: queue.data.cur_metric.pending = np.mean(queue.data.pendings) else: children = self.tree.children(queue.tag) for child in children: self.cal_pending(child) queue.data.cur_metric.pending += child.data.get_pending() return queue.data.get_pending() def cal_pending_division(self, queue=None): if queue is None: queue = self.get_root() division = 0.0 if self.is_leaf(queue.tag): return division else: children = self.tree.children(queue.tag) for child in children: self.cal_pending_division(child) count = len(children) avg_pending = queue.data.get_pending() * 1.0 / count square_sum = 0.0 for child in children: square_sum += np.square(child.data.get_pending() - avg_pending) division = np.sqrt(square_sum / count) queue.data.cur_metric.pending_div = division return division def cal_slowdown_division(self, queue=None): if queue is None: queue = self.get_root() division = 0.0 if self.is_leaf(queue.tag): return division else: children = self.tree.children(queue.tag) for child in children: self.cal_slowdown_division(child) square_sum = 0.0 count = len(children) for child in children: square_sum += np.square(child.data.get_slowdown() - queue.data.get_slowdown()) division = np.sqrt(square_sum / count) queue.data.cur_metric.slowdown_div = division return division def cal_memory_usage(self, queue=None): if queue is None: queue = self.get_root() if queue.is_leaf(): capacity = queue.data.get_abs_capacity() memory_usage = 0.0 if capacity != 0: memory_usage = 100.0 * queue.data.cal_queue_memory_usage( ) / capacity queue.data.set_mem_usage(memory_usage) else: children = self.tree.children(queue.tag) for child in children: self.cal_memory_usage(child) abs_memory_usage = 0 for child in children: abs_memory_usage += child.data.get_abs_memory_usage() queue.data.set_mem_usage(100.0 * abs_memory_usage / queue.data.get_abs_capacity()) return queue.data.get_mem_usage() def cal_mem_usage_division(self, queue=None): if queue is None: queue = self.get_root() std_division = 0.0 if self.is_leaf(queue.tag): queue.data.cur_metric.mem_usage_div = std_division return std_division else: children = self.tree.children(queue.tag) for child in children: self.cal_mem_usage_division(child) count = len(children) total_mem_usage = 0 for child in children: total_mem_usage += child.data.get_mem_usage() avg_mem_usage = total_mem_usage / count square_sum = 0 for child in children: square_sum += np.square(child.data.get_mem_usage() - avg_mem_usage) std_division = np.sqrt(square_sum / count) queue.data.cur_metric.mem_usage_div = std_division return std_division def cal_abs_capacity_bottom_up(self, queue=None): if queue is None: queue = self.get_root() if self.is_leaf(queue.tag): return else: children = self.tree.children(queue.tag) abs_capacity = 0.0 for child in children: self.cal_abs_capacity_bottom_up(child) abs_capacity += child.data.get_abs_capacity() queue.data.set_abs_capacity(abs_capacity) def cal_desired_abs_capacity_bottom_up(self, queue=None, delim=None): if queue is None: queue = self.get_root() delim = 1 if self.is_leaf(queue.tag): queue.data.wish.capacity = queue.data.wish.abs_capacity / delim else: children = self.tree.children(queue.tag) abs_capacity = 0.0 for child in children: self.cal_desired_abs_capacity_bottom_up( child, queue.data.config.abs_capacity * delim / 100) abs_capacity += child.data.wish.abs_capacity queue.data.wish.capacity = abs_capacity / delim def cal_abs_capacity_top_down(self, queue=None): if queue is None: queue = self.get_root() queue.data.set_abs_capacity(100.0) if self.is_leaf(queue.tag): return else: children = self.tree.children(queue.tag) for child in children: child.data.set_abs_capacity(queue.data.get_abs_capacity() * child.data.get_capacity() / 100.0) self.cal_abs_capacity_top_down(child) def cal_desired_capacity_top_down(self, queue=None): if queue is None: queue = self.get_root() queue.data.wish.capacity = 100.0 if self.is_leaf(queue.tag): return else: children = self.tree.children(queue.tag) abs_capacity = queue.data.wish.abs_capacity for child in children: child.data.wish.capacity = child.data.config.capacity if abs_capacity == 0: child.data.wish.capacity = 0 else: child.data.wish.capacity = child.data.wish.abs_capacity / abs_capacity * 100.0 self.cal_desired_capacity_top_down(child) def cal_capacity_top_down(self, queue=None): if queue is None: queue = self.get_root() if self.is_leaf(queue.tag): return else: children = self.tree.children(queue.tag) abs_capacity = queue.data.get_abs_capacity() for child in children: if abs_capacity == 0: child.data.set_capacity(0) else: child.data.set_capacity(child.data.get_abs_capacity() / abs_capacity * 100) self.cal_capacity_top_down(child) def cal_abs_memory_top_down(self, queue=None): if queue is None: queue = self.get_root() queue.data.cal_totalMb_mean() if self.is_leaf(queue.tag): return else: children = self.tree.children(queue.tag) for child in children: child.data.set_abs_memory(queue.data.get_abs_memory() * child.data.get_capacity() / 100) self.cal_abs_memory_top_down(child) def clear_mus_top_down(self, queue=None): if queue is None: queue = self.get_root() if self.is_leaf(queue.tag): queue.data.clear_queue_memory_usage() else: children = self.tree.children(queue.tag) for child in children: self.clear_mus_top_down(child) def clear_jobs_top_down(self, queue=None): if queue is None: queue = self.get_root() if self.is_leaf(queue.tag): queue.data.clear_jobs() else: children = self.tree.children(queue.tag) for child in children: self.clear_jobs_top_down(child) def clear_pendings_top_down(self, queue=None): if queue is None: queue = self.get_root() if self.is_leaf(queue.tag): queue.data.clear_pendings() else: children = self.tree.children(queue.tag) for child in children: self.clear_pendings_top_down(child) def score(self): # self.cal_abs_capacity_bottom_up() # self.cal_capacity_top_down() # self.cal_abs_memory_top_down() # self.cal_slowdown() # self.cal_slowdown_division() self.cal_pending() self.cal_pending_division() self.cal_memory_usage() self.cal_mem_usage_division() # self.clear_jobs_top_down() # self.clear_pendings_top_down() # self.clear_mus_top_down() def predict(self): self.cal_desired_abs_capacity_bottom_up()
class PathList: def __init__(self, disk): self._tree = Tree() self._disk = disk self._tree.create_node(tag='root', identifier='root') self.depth = 3 def update_path_list(self, file_id='root', depth=None, is_fid=True): if depth is None: depth = self.depth if not is_fid: file_id = self.get_path_fid(file_id, update=False) file_list = self._disk.get_file_list(file_id) if not file_list: return False for i in file_list: if i['type'] == 'file': file_info = FileInfo( name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=True, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden'], category=i['category'], content_type=i['content_type'], size=i['size'], content_hash_name=i['content_hash_name'], content_hash=i['content_hash'], download_url=i['download_url'] if 'download_url' in i else '') else: file_info = FileInfo( name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=False, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden']) if self._tree.get_node(file_info.id): self._tree.update_node(file_id, data=file_info) else: self._tree.create_node(tag=file_info.name, identifier=file_info.id, data=file_info, parent=file_id) if not file_info.type and depth: self.update_path_list(file_id=file_info.id, depth=depth - 1) return True def tree(self, path='root'): file_id = self.get_path_fid(path, update=False) self.update_path_list(file_id) if not file_id: raise FileNotFoundError(path) self._tree.show(file_id) def get_path_list(self, path, update=True): file_id = self.get_path_fid(path, update=update) return self.get_fid_list(file_id, update=update) def get_fid_list(self, file_id, update=True): if not file_id: raise FileNotFoundError(Path) self.auto_update_path_list(update, file_id) if file_id != 'root' and self._tree.get_node(file_id).data.type: return [self._tree.get_node(file_id).data] return [i.data for i in self._tree.children(file_id)] def get_path_fid(self, path, file_id='root', update=True): path = PurePosixPath(Path(path).as_posix()) if str(path) in ('', '/', '\\', '.', 'root'): return 'root' flag = False path_list = list(filter(None, str(path).split('/'))) if path_list[0] == 'root': path_list = path_list[1:] for i in path_list: flag = False node_list = self._tree.children(file_id) if not node_list: self.auto_update_path_list(update, file_id) node_list = self._tree.children(file_id) for j in node_list: if i == j.tag: flag = True file_id = j.identifier break if not flag: return False if flag: return file_id return False def get_path_node(self, path, update=True): file_id = self.get_path_fid(path, update=update) if file_id: return self._tree.get_node(file_id) return False def get_path_parent_node(self, path, update=True): file_id = self.get_path_fid(path, update=update) if file_id: node = self._tree.parent(file_id) if node: return node return False def auto_update_path_list(self, update=True, file_id=None): if not update and file_id: return self.update_path_list(file_id, depth=0) elif update and len(self._tree) == 1: return self.update_path_list()
class SAGG_BRIAC(): def __init__(self, min, max, temperature=20): # example --> min: [-1,-1] max: [1,1] assert len(min) == len(max) self.maxlen = 200 self.window_cp = 200 self.minlen = self.maxlen / 20 self.maxregions = 80 # init regions' tree self.tree = Tree() self.regions_bounds = [Box(min, max, dtype=np.float32)] self.interest = [0.] self.tree.create_node('root','root',data=Region(maxlen=self.maxlen, cps_gs=[deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)], bounds=self.regions_bounds[-1], interest=self.interest[-1])) self.nb_dims = len(min) self.temperature = temperature self.nb_split_attempts = 50 self.max_difference = 0.2 self.init_size = max - min self.ndims = len(min) self.mode_3_noise = 0.1 # book-keeping self.sampled_tasks = [] self.all_boxes = [] self.all_interests = [] self.update_nb = 0 self.split_iterations = [] def compute_interest(self, sub_region): if len(sub_region[0]) > self.minlen: # TRICK NB 4 cp_window = min(len(sub_region[0]), self.window_cp) # not completely window half = int(cp_window / 2) # print(str(cp_window) + 'and' + str(half)) first_half = np.array(sub_region[0])[-cp_window:-half] snd_half = np.array(sub_region[0])[-half:] diff = first_half.mean() - snd_half.mean() cp = np.abs(diff) else: cp = 0 interest = np.abs(cp) return interest def split(self, nid): # try nb_split_attempts splits reg = self.tree.get_node(nid).data best_split_score = 0 best_abs_interest_diff = 0 best_bounds = None best_sub_regions = None is_split = False for i in range(self.nb_split_attempts): sub_reg1 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)] sub_reg2 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)] # repeat until the two sub regions contain at least minlen of the mother region TRICK NB 1 while len(sub_reg1[0]) < self.minlen or len(sub_reg2[0]) < self.minlen: # decide on dimension dim = np.random.choice(range(self.nb_dims)) threshold = reg.bounds.sample()[dim] bounds1 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32) bounds1.high[dim] = threshold bounds2 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32) bounds2.low[dim] = threshold bounds = [bounds1, bounds2] valid_bounds = True if np.any(bounds1.high - bounds1.low < self.init_size / 15): # to enforce not too small boxes TRICK NB 2 valid_bounds = False if np.any(bounds2.high - bounds2.low < self.init_size / 15): valid_bounds = valid_bounds and False # perform split in sub regions sub_reg1 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)] sub_reg2 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)] for i, task in enumerate(reg.cps_gs[1]): if bounds1.contains(task): sub_reg1[1].append(task) sub_reg1[0].append(reg.cps_gs[0][i]) else: sub_reg2[1].append(task) sub_reg2[0].append(reg.cps_gs[0][i]) sub_regions = [sub_reg1, sub_reg2] # compute interest interest = [self.compute_interest(sub_reg1), self.compute_interest(sub_reg2)] # compute score split_score = len(sub_reg1) * len(sub_reg2) * np.abs(interest[0] - interest[1]) if split_score >= best_split_score and valid_bounds: # TRICK NB 3, max diff #and np.abs(interest[0] - interest[1]) >= self.max_difference / 8 is_split = True best_abs_interest_diff = np.abs(interest[0] - interest[1]) best_split_score = split_score best_sub_regions = sub_regions best_bounds = bounds if is_split: if best_abs_interest_diff > self.max_difference: self.max_difference = best_abs_interest_diff # add new nodes to tree for i, (cps_gs, bounds) in enumerate(zip(best_sub_regions, best_bounds)): self.tree.create_node(parent=nid, data=Region(self.maxlen, cps_gs=cps_gs, bounds=bounds, interest=interest[i])) else: #print("abort mission") # TRICK NB 6, remove old stuff if can't find split assert len(reg.cps_gs[0]) == (self.maxlen + 1) reg.cps_gs[0] = deque(islice(reg.cps_gs[0], int(self.maxlen / 4), self.maxlen + 1)) reg.cps_gs[1] = deque(islice(reg.cps_gs[1], int(self.maxlen / 4), self.maxlen + 1)) return is_split def merge(self, all_nodes): # get a list of children pairs parent_children = [] for n in all_nodes: if not n.is_leaf(): # if node is a parent children = self.tree.children(n.identifier) if children[0].is_leaf() and children[1].is_leaf(): # both children must be leaves for an easy remove parent_children.append([n, children]) # [parent, [child1, child2]] # sort each pair of children by their summed interest parent_children.sort(key=lambda x: np.abs(x[1][0].data.interest - x[1][1].data.interest), reverse=False) # remove useless pair child1 = parent_children[0][1][0] child2 = parent_children[0][1][1] # print("just removed {} and {}, daddy is: {}, childs: {}".format(child1.identifier, child2.identifier, # parent_children[0][0].identifier, # self.tree.children( # # print("bef") # parent_children[0][0].identifier))) # print([n.identifier for n in self.tree.all_nodes()]) self.tree.remove_node(child1.identifier) self.tree.remove_node(child2.identifier) # print("aff remove {} and {}".format(child1.identifier), child2.identifier) # print([n.identifier for n in self.tree.all_nodes()]) # remove 1/4 of parent to avoid falling in a splitting-merging loop dadta = parent_children[0][0].data # hahaha! dadta.cps_gs[0] = deque(islice(dadta.cps_gs[0], int(self.maxlen / 4), self.maxlen + 1)) dadta.cps_gs[1] = deque(islice(dadta.cps_gs[1], int(self.maxlen / 4), self.maxlen + 1)) self.nodes_to_recompute.append(parent_children[0][0].identifier) # remove child from recompute list if they where touched when adding the current task if child1.identifier in self.nodes_to_recompute: self.nodes_to_recompute.pop(self.nodes_to_recompute.index(child1.identifier)) if child2.identifier in self.nodes_to_recompute: self.nodes_to_recompute.pop(self.nodes_to_recompute.index(child2.identifier)) def add_task_comp(self, node, task, comp): reg = node.data nid = node.identifier if reg.bounds.contains(task): # task falls within region self.nodes_to_recompute.append(nid) children = self.tree.children(nid) for n in children: # if task in region, task is in one sub-region self.add_task_comp(n, task, comp) need_split = reg.add(task, comp, children == []) # COPY ALL MODE if need_split: self.nodes_to_split.append(nid) def update(self, task, continuous_competence, all_raw_rewards): # add new (task, competence) to regions nodes self.nodes_to_split = [] self.nodes_to_recompute = [] new_split = False root = self.tree.get_node('root') self.add_task_comp(root, task, continuous_competence) #print(self.nodes_to_split) assert len(self.nodes_to_split) <= 1 # split a node if needed need_split = len(self.nodes_to_split) == 1 if need_split: new_split = self.split(self.nodes_to_split[0]) if new_split: self.update_nb += 1 #print(self.update_nb) # update list of regions_bounds all_nodes = self.tree.all_nodes() if len(all_nodes) > self.maxregions: # too many regions, lets merge one of them self.merge(all_nodes) all_nodes = self.tree.all_nodes() self.regions_bounds = [n.data.bounds for n in all_nodes] # recompute interests of touched nodes #print(self.nodes_to_recompute) for nid in self.nodes_to_recompute: #print(nid) node = self.tree.get_node(nid) reg = node.data reg.interest = self.compute_interest(reg.cps_gs) # collect new interests and new [comp, tasks] lists all_nodes = self.tree.all_nodes() self.interest = [] self.cps_gs = [] for n in all_nodes: self.interest.append(n.data.interest) self.cps_gs.append(n.data.cps_gs) # bk-keeping self.all_boxes.append(copy.copy(self.regions_bounds)) self.all_interests.append(copy.copy(self.interest)) self.split_iterations.append(self.update_nb) assert len(self.interest) == len(self.regions_bounds) return new_split, None def draw_random_task(self): return self.regions_bounds[0].sample() # first region is root region def sample_task(self, args): mode = np.random.rand() if mode < 0.1: # "mode 3" (10%) -> sample on regions and then mutate lowest-performing task in region if len(self.sampled_tasks) == 0: self.sampled_tasks.append(self.draw_random_task()) else: region_id = proportional_choice(self.interest, eps=0.0) worst_task_idx = np.argmin(self.cps_gs[region_id][0]) # mutate task by a small amount (i.e a gaussian scaled to the regions range) task = np.random.normal(self.cps_gs[region_id][1][worst_task_idx].copy(), 0.1) # clip to stay within region (add small epsilon to avoid falling in multiple regions) task = np.clip(task, self.regions_bounds[region_id].low + 1e-5, self.regions_bounds[region_id].high - 1e-5) self.sampled_tasks.append(task) elif mode < 0.3: # "mode 2" (20%) -> random task self.sampled_tasks.append(self.draw_random_task()) else: # "mode 1" (70%) -> sampling on regions and then random task in selected region region_id = proportional_choice(self.interest, eps=0.0) self.sampled_tasks.append(self.regions_bounds[region_id].sample()) # # sample region # if np.random.rand() < 0.2: # region_id = np.random.choice(range(self.nb_regions)) # else: # region_id = np.random.choice(range(self.nb_regions), p=np.array(self.probas)) # # sample task # self.sampled_tasks.append(self.regions_bounds[region_id].sample()) # # return self.sampled_tasks[-1].tolist() # sample region # region_id = proportional_choice(self.interest, eps=0.2) # # sample task # self.sampled_tasks.append(self.regions_bounds[region_id].sample()) return self.sampled_tasks[-1] def dump(self, dump_dict): dump_dict['all_boxes'] = self.all_boxes dump_dict['split_iterations'] = self.split_iterations dump_dict['all_interests'] = self.all_interests return dump_dict @property def nb_regions(self): return len(self.regions_bounds) @property def get_regions(self): return self.regions_bounds
class RMQueue(metaclass=Singleton): MAX_METRIC_COUNT = 12 CAL_INTERVAL_IN_SECOND = 2 * 60 * 60 # 2hours def __init__(self): self.tree = Tree() def set_stat_interval(self, interval): RMQueue.CAL_INTERVAL_IN_SECOND = interval def set_system_memory(self, size): root = self.get_root() root.data.set_abs_memory(float(size)) def create_queue(self, name=None, parent=None): data = QueueData() self.tree.create_node(name, name, parent, data) def display(self): self.tree.show() def display_score_old(self, queue=None, depth=0): if queue is None: queue = self.get_root() print('------------' + utils.get_str_time() + ' SCORE ----------') print(24 * ' ' + ' SLOWDOWN MEMORY USAGE ') print('QUEUE NAME' + 16 * ' ' + ' AVG DIV AVG DIV') if depth >= 0: print(queue.tag + (22 - len(queue.tag))*' ' + \ '%8.3f' % queue.data.get_slowdown(), \ ' %8.3f ' % queue.data.get_slowdown_div(), \ ' %8.3f' % queue.data.get_mem_usage(), \ ' %8.3f' % queue.data.get_mem_usage_div()) """ print(queue.tag, '(slowdown: %.3f' % queue.data.get_slowdown(), \ 'div: %.3f)' % queue.data.get_slowdown_div(), \ '(mem usage: %.3f' % queue.data.get_mem_usage(), \ 'div: %.3f)' % queue.data.get_mem_usage_div()) """ else: print('-'*depth + queue.tag, '(slowdown: %.3f' % queue.data.get_slowdown(), \ 'div: %.3f)' % queue.data.get_slowdown_div(), \ '(mem usage: %.3f' % queue.data.get_mem_usage(), \ 'div: %.3f)' % queue.data.get_mem_usage_div()) if self.is_leaf(queue.tag) == False: children = self.tree.children(queue.tag) for child in children: self.display_score(child, depth + 2) def display_score(self, queue=None, depth=0): if queue is None: queue = self.get_root() print('------------' + utils.get_str_time() + ' SCORE ----------') print(24 * ' ' + ' PENDING MEMORY USAGE ') print('QUEUE NAME' + 16 * ' ' + ' AVG DIV AVG DIV') if depth >= 0: print(queue.tag + (22 - len(queue.tag))*' ' + \ '%8.3f' % queue.data.get_pending(), \ ' %8.3f ' % queue.data.get_pending_div(), \ ' %8.3f' % queue.data.get_mem_usage(), \ ' %8.3f' % queue.data.get_mem_usage_div()) else: print('-'*depth + queue.tag, '(slowdown: %.3f' % queue.data.get_slowdown(), \ 'div: %.3f)' % queue.data.get_slowdown_div(), \ '(mem usage: %.3f' % queue.data.get_mem_usage(), \ 'div: %.3f)' % queue.data.get_mem_usage_div()) if self.is_leaf(queue.tag) == False: children = self.tree.children(queue.tag) for child in children: self.display_score(child, depth + 2) def display_prediction(self, queue=None, depth=0): if queue is None: queue = self.get_root() print('------------' + utils.get_str_time() + ' PREDICTION ----------') print('QUEUE NAME DESIRED CAPACITY') if depth >= 0: print(queue.tag + (22 - len(queue.tag)) * ' ', ' %8.3f' % queue.data.wish.capacity) # print(queue.tag, 'desired capacity: %.3f' % queue.data.wish.capacity) else: print('-' * depth + queue.tag, 'desired capacity: %.3f' % queue.data.wish.capacity) if self.is_leaf(queue.tag) == False: children = self.tree.children(queue.tag) for child in children: self.display_prediction(child, depth + 2) def write_score(self, path): with open(path, 'a') as f: self.write_score_top_down(output=f) def write_score_top_down_old(self, queue=None, depth=0, output=None): if queue is None: queue = self.get_root() output.writelines( ('\n---------', utils.get_str_time(), ' SCORE ---------\n')) output.writelines(24 * ' ' + ' SLOWDOWN MEMORY USAGE\n') output.writelines('QUEUE NAME' + 16 * ' ' + ' AVG DIV AVG DIV\n') if depth >= 0: output.writelines(queue.tag + (22 - len(queue.tag))*' ' + \ '%8.3f' % queue.data.get_slowdown() + \ ' %8.3f ' % queue.data.get_slowdown_div() + \ ' %8.3f' % queue.data.get_mem_usage() + \ ' %8.3f' % queue.data.get_mem_usage_div() + '\n') """ output.writelines( (queue.tag, ' (slowdown: %.3f' % queue.data.get_slowdown(), \ ' div: %.3f)' % queue.data.get_slowdown_div(), \ ' (mem usage: %.3f' % queue.data.get_mem_usage(), \ ' div: %.3f)' % queue.data.get_mem_usage_div(), '\n')) """ else: output.writelines(('-'*depth + queue.tag, ' (slowdown: %.3f' % queue.data.get_slowdown(), \ ' div: %.3f)' % queue.data.get_slowdown_div(), \ ' (mem usage: %.3f' % queue.data.get_mem_usage(), \ ' div: %.3f)' % queue.data.get_mem_usage_div(), '\n')) if self.is_leaf(queue.tag) == False: children = self.tree.children(queue.tag) for child in children: self.write_score_top_down(child, depth + 2, output) def write_score_top_down(self, queue=None, depth=0, output=None): if queue is None: queue = self.get_root() output.writelines( ('\n---------', utils.get_str_time(), ' SCORE ---------\n')) output.writelines(24 * ' ' + ' PENDING MEMORY USAGE\n') output.writelines('QUEUE NAME' + 16 * ' ' + ' AVG DIV AVG DIV\n') output.writelines(queue.tag + (22 - len(queue.tag))*' ' + \ '%8.3f' % queue.data.get_pending() + \ ' %8.3f ' % queue.data.get_pending_div() + \ ' %8.3f' % queue.data.get_mem_usage() + \ ' %8.3f' % queue.data.get_mem_usage_div() + '\n') if self.is_leaf(queue.tag) == False: children = self.tree.children(queue.tag) for child in children: self.write_score_top_down(child, depth + 2, output) def write_prediction(self, path): with open(path, 'a') as f: self.write_prediction_top_down(output=f) def write_prediction_top_down(self, queue=None, depth=0, output=None): if queue is None: queue = self.get_root() output.writelines(('\n---------', utils.get_str_time(), ' PREDICTION---------\n')) output.writelines('QUEUE NAME DESIRED CAPACITY\n') if depth >= 0: output.writelines(queue.tag + (22 - len(queue.tag)) * ' ' + ' %8.3f' % queue.data.wish.capacity + '\n') # output.writelines( (queue.tag, ' desired capacity: %.3f' % queue.data.wish.capacity, '\n')) else: output.writelines(('-'*depth + queue.tag, \ ' desired capacity: %.3f' % queue.data.wish.capacity, '\n')) if self.is_leaf(queue.tag) == False: children = self.tree.children(queue.tag) for child in children: self.write_prediction_top_down(child, depth + 2, output) def add_job(self, job, qname): queue = self.tree.get_node(qname) if queue.is_leaf(): queue.data.add_job(job) else: print("Canot add jobs to parent queue", queue.tag, queue.identifier) def add_metric(self, qname): queue = self.tree.get_node(qname) queue.data.add_metric(queue.cur_metric) if len(queue.data.metrics) > RMQueue.MAX_METRIC_COUNT: del queue.data.metrics[0] def remove_queue(self, qname): """ Remove a queue indicated by 'qname'; all the successors are removed as well. Return the number of removed nodes. """ self.tree.remove_node(qname) def move_queue(self, src, dest): """ Move a queue indicated by @src parameter to be a child of @dest. """ self.tree.move_node(src, dest) def get_queue(self, qname): return self.tree.get_node(qname) def get_root(self): return self.get_queue('root') def is_leaf(self, qname): queue = self.tree.get_node(qname) return queue.is_leaf() def cal_slowdown(self, queue=None): """ if current queue is a leaf queue: calculate the average slowdown in is jobs. else: calculate the average slowdown of its chilren; calculate the average slowdown of current queue through its chilren's average slowdown. """ if queue is None: queue = self.get_root() avg_slowdown = 0.0 if queue.is_leaf(): job_count = len(queue.data.jobs) for i in list(range(job_count)): job = queue.data.jobs[i] slowdown = (job.wait_time + job.run_time) / job.run_time avg_slowdown += slowdown / job_count queue.data.set_job_count(job_count) queue.data.cur_metric.slowdown = avg_slowdown else: # parent queue # First, get its all chilren queue, and call each child's cal_slowdown function children = self.tree.children(queue.tag) for child in children: self.cal_slowdown(child) # Second, get the job count job_count = 0 for child in children: job_count += child.data.get_job_count() queue.data.set_job_count(job_count) # Finally, calculate the average slowdown of the queue if job_count == 0: queue.data.cur_metric.slowdown = avg_slowdown return avg_slowdown for child in children: avg_slowdown += child.data.get_job_count( ) * child.data.get_slowdown() / job_count queue.data.cur_metric.slowdown = avg_slowdown return queue.data.get_slowdown() def cal_pending(self, queue=None): """ if current queue is a leaf queue: calculate the average pending count in is pendings. else: calculate the average pending of its chilren; calculate the pending of current queue through the sum all of its chilren's pending. """ if queue is None: queue = self.get_root() if queue.is_leaf(): queue.data.cal_leaf_pending() else: # parent queue # First, get its all chilren queue, and call each child's cal_pending function # Second, get the sum of all its children pending children = self.tree.children(queue.tag) for child in children: self.cal_pending(child) queue.data.cur_metric.pending += child.data.get_pending() return queue.data.get_pending() def cal_pending_division(self, queue=None): """ if current queue is a leaf queue: stdDivision is zero. else: calculate the standard division of its chilren; calculate the standard division of current queue through its chilren's average pending. """ if queue is None: queue = self.get_root() division = 0.0 if self.is_leaf(queue.tag): return division else: # parent queue children = self.tree.children(queue.tag) # First, get its all chilren queue, and call each child's calSlowDown function for child in children: self.cal_pending_division(child) # Second, calculate the square sum of division count = len(children) avg_pending = queue.data.get_pending() * 1.0 / count squareSum = 0.0 for child in children: squareSum += np.square(child.data.get_pending() - avg_pending) # Finally, calculate the standard division of the queue # if count == 0: # queue.data.cur_metric.slowdown_div = division # return division division = np.sqrt(squareSum / count) queue.data.cur_metric.pending_div = division return division def cal_slowdown_division(self, queue=None): """ if current queue is a leaf queue: stdDivision is zero. else: calculate the standard division of its chilren; calculate the standard division of current queue through its chilren's average slowdown. """ if queue is None: queue = self.get_root() division = 0.0 if self.is_leaf(queue.tag): return division else: # parent queue children = self.tree.children(queue.tag) # First, get its all chilren queue, and call each child's calSlowDown function for child in children: self.cal_slowdown_division(child) # Second, calculate the square sum of division squareSum = 0.0 count = len(children) for child in children: squareSum += np.square(child.data.get_slowdown() - queue.data.get_slowdown()) # Finally, calculate the standard division of the queue # if count == 0: # queue.data.cur_metric.slowdown_div = division # return division division = np.sqrt(squareSum / count) queue.data.cur_metric.slowdown_div = division return division def cal_memory_usage_old(self, queue=None): """ if current queue is a leaf queue: MemoryUsage is the (sum of job memorySeconds )/(self.absMemory * CAL_INTERVAL_IN_SECOND) Get absUsedMemory by self.memoryUsage * self.absMemory else: calculate the memory usage of its chilren; calculate the absolute used memory of the queue. MemoryUsage = absUsedMemory / absMemory """ if queue is None: queue = self.get_root() memory_usage = 0.0 if queue.is_leaf(): total_memory_seconds = queue.data.cal_leaf_mem_second() total_memory_capacity = queue.data.get_abs_memory( ) * RMQueue.CAL_INTERVAL_IN_SECOND memory_usage = 1.0 * total_memory_seconds / total_memory_capacity queue.data.set_mem_usage(memory_usage) queue.data.cal_abs_used_memory() else: # parent queue # First, get its all chilren queue, and call each child's calMemoryUsage function children = self.tree.children(queue.tag) for child in children: self.cal_memory_usage_old(child) # Second, calculate the absUsedMemory of current queue abs_used_memory = 0 for child in children: abs_used_memory += child.data.get_abs_used_memory() queue.data.set_abs_used_memory(abs_used_memory) # Finally, calculate the memory usage of the queue queue.data.set_mem_usage(1.0 * queue.data.get_abs_used_memory() / queue.data.get_abs_memory()) return queue.data.get_mem_usage() def cal_memory_usage(self, queue=None): """ if current queue is a leaf queue: MemoryUsage is the abs_memoryusage/self.absMemoryCapacity else: calculate the memory usage of its chilren; calculate the absolute memory of the queue. MemoryUsage = absUsedMemory / absMemory """ if queue is None: queue = self.get_root() memory_usage = 0.0 if queue.is_leaf(): abs_memory_usage = queue.data.cal_queue_memory_usage() abs_memory_capacity = queue.data.get_abs_capacity() memory_usage = 100.0 * abs_memory_usage / abs_memory_capacity queue.data.set_mem_usage(memory_usage) queue.data.set_abs_memory_usage(abs_memory_usage) else: # parent queue # First, get its all chilren queue, and call each child's calMemoryUsage function children = self.tree.children(queue.tag) for child in children: self.cal_memory_usage(child) # Second, calculate the absUsedMemoryUsage of current queue abs_memory_usage = 0 for child in children: abs_memory_usage += child.data.get_abs_memory_usage() queue.data.set_abs_memory_usage(abs_memory_usage) # Finally, calculate the memory usage of the queue queue.data.set_mem_usage(100.0 * queue.data.get_abs_memory_usage() / queue.data.get_abs_capacity()) return queue.data.get_mem_usage() def cal_mem_usage_division(self, queue=None): """ if current queue is a leaf queue: memUsageDivision is zero. else: calculate the standard division of its chilren; calculate the standard division of current queue through its chilren's average memoryUsage """ if queue is None: queue = self.get_root() std_division = 0.0 if self.is_leaf(queue.tag): queue.data.cur_metric.mem_usage_div = std_division return std_division else: # parent queue # First, get its all chilren queue, and call each child's calSlowDown function children = self.tree.children(queue.tag) for child in children: self.cal_mem_usage_division(child) # Second, calculate the average memory usage of all its children count = len(children) total_mem_usage = 0 for child in children: total_mem_usage += child.data.get_mem_usage() # print(child.data.get_mem_usage()) avg_mem_usage = total_mem_usage / count # Finally, calculate the standard division of the queue squareSum = 0 for child in children: squareSum += np.square(child.data.get_mem_usage() - avg_mem_usage) std_division = np.sqrt(squareSum / count) queue.data.cur_metric.mem_usage_div = std_division return std_division def cal_abs_capacity_bottom_up(self, queue=None): if queue is None: queue = self.get_root() if self.is_leaf(queue.tag): return else: children = self.tree.children(queue.tag) abs_capacity = 0 for child in children: # print("Queue name: %s, abs_capacity: %.2f" %(child.tag, child.data.get_abs_capacity())) self.cal_abs_capacity_bottom_up(child) abs_capacity += child.data.get_abs_capacity() queue.data.set_abs_capacity(abs_capacity) def cal_desired_abs_capacity_bottom_up(self, queue=None): if queue is None: queue = self.get_root() if self.is_leaf(queue.tag): return else: children = self.tree.children(queue.tag) abs_capacity = 0.0 fixed_capacity = 0.0 for child in children: self.cal_desired_abs_capacity_bottom_up(child) if child.data.config.fixed: # print("FIXED") # print(child.data.config.capacity) # print(child.data.config.abs_capacity) fixed_capacity += child.data.config.capacity else: abs_capacity += child.data.wish.abs_capacity for child in children: if child.data.config.fixed: child.data.wish.abs_capacity = abs_capacity / ( 100.0 - fixed_capacity) * child.data.config.capacity queue.data.wish.abs_capacity = abs_capacity * 100.0 / ( 100.0 - fixed_capacity) def clear_desired_abs_capacity(self, queue=None): if queue is None: queue = self.get_root() queue.data.wish.abs_capacity = 0 if self.is_leaf(queue.tag): return else: queue.data.cur_metric.pending = 0.0 children = self.tree.children(queue.tag) for child in children: self.clear_desired_abs_capacity(child) def cal_abs_capacity_top_down(self, queue=None): """ This function calculate the abs capacity of each queue by its capacity. This function should only be called once at the start time. """ if queue is None: queue = self.get_root() queue.data.set_abs_capacity(100.0) if self.is_leaf(queue.tag): return else: children = self.tree.children(queue.tag) for child in children: child.data.set_abs_capacity(queue.data.get_abs_capacity() * child.data.get_capacity() / 100.0) # print(child.data.get_abs_capacity()) self.cal_capacity_top_down(child) def cal_desired_capacity_top_down(self, queue=None): if queue is None: queue = self.get_root() queue.data.wish.capacity = 100.0 if self.is_leaf(queue.tag): return else: children = self.tree.children(queue.tag) abs_capacity = queue.data.wish.abs_capacity remain_capaciy = 100.0 for child in children: if child.data.config.fixed: child.data.wish.capacity = child.data.config.capacity elif abs_capacity == 0: child.data.wish.capacity = 0 else: child.data.wish.capacity = child.data.wish.abs_capacity / abs_capacity * 100.0 self.cal_desired_capacity_top_down(child) def cal_capacity_top_down(self, queue=None): if queue is None: queue = self.get_root() if self.is_leaf(queue.tag): return else: children = self.tree.children(queue.tag) abs_capacity = queue.data.get_abs_capacity() for child in children: if abs_capacity == 0: child.data.set_capacity(0) else: child.data.set_capacity(child.data.get_abs_capacity() / abs_capacity * 100) self.cal_capacity_top_down(child) def cal_abs_memory_top_down(self, queue=None): if queue is None: queue = self.get_root() queue.data.cal_totalMb_mean() queue.data.clear_totalMb() if self.is_leaf(queue.tag): return else: children = self.tree.children(queue.tag) for child in children: child.data.set_abs_memory(queue.data.get_abs_memory() * child.data.get_capacity() / 100) self.cal_abs_memory_top_down(child) def clear_mus_top_down(self, queue=None): if queue is None: queue = self.get_root() if self.is_leaf(queue.tag): queue.data.clear_queue_memory_usage() else: children = self.tree.children(queue.tag) for child in children: self.clear_mus_top_down(child) def clear_jobs_top_down(self, queue=None): if queue is None: queue = self.get_root() if self.is_leaf(queue.tag): queue.data.clear_jobs() else: children = self.tree.children(queue.tag) for child in children: self.clear_jobs_top_down(child) def clear_pendings_top_down(self, queue=None): if queue is None: queue = self.get_root() if self.is_leaf(queue.tag): queue.data.clear_pendings() else: children = self.tree.children(queue.tag) for child in children: self.clear_pendings_top_down(child) def before_scoring(self): self.cal_abs_capacity_bottom_up() self.cal_capacity_top_down() self.cal_abs_memory_top_down() def after_scoreing(self): self.clear_jobs_top_down() self.clear_pendings_top_down() self.clear_mus_top_down() def score(self): self.before_scoring() # self.cal_slowdown() # self.cal_slowdown_division() self.cal_pending() self.cal_pending_division() self.cal_memory_usage() self.cal_mem_usage_division() self.after_scoreing() def before_predict(self): self.cal_desired_abs_capacity_bottom_up() def after_predict(self): self.clear_desired_abs_capacity() def predict(self): self.before_predict() self.cal_desired_capacity_top_down() self.after_predict()
def main(): """ Solution to Advent of Code Day 7 """ filename = "puzzle_test.txt" if len(sys.argv) < 2 else sys.argv[1] rules = open(filename).read().splitlines() # parse input file into a dictionary # (bag count, bag name) rules = [ re.sub(r"bags|bag|\.| |contain no other ", "", rule) for rule in rules ] luggage_dict = dict() for rule in rules: relations = rule.split('contain') if len(relations) > 1: members = relations[1].replace(',', ' ') data = re.findall(r"(\d+)(\w+)", members) luggage_dict[relations[0]] = data # generate a dictionary of trees mapping each luggage relationship # (bag count, bag name) tree_dict = dict() for data in luggage_dict: tree_dict[data] = Tree() root_node = Node(tag=('1', data), data=data) parent_id = root_node.identifier tree_dict[data].add_node(root_node) parent_id = root_node.identifier for bag in luggage_dict[data]: tree_dict[data].create_node(tag=bag, data=data, parent=parent_id) # count every tree that contains a shiny gold bag print( "part #1:", sum([(search_luggage(luggage_dict, key, "shinygold")) for key in luggage_dict]) - 1) # build tree of luggage containing shiny gold items = ['shinygold'] tree = Tree() node = Node('shinygold', data=int(1)) items = [node] tree.add_node(node, parent=None) while items: parent_node = items.pop() parent_id = parent_node.identifier if parent_node.tag not in luggage_dict: continue for data in luggage_dict[parent_node.tag]: node = Node(tag=data[1], data=int(data[0])) tree.add_node(node, parent=parent_id) items.append(node) # preorder traversal to distribute the luggage multiplier stack = [tree.get_node(tree.root)] output = [] while stack: node = stack.pop() for child_node in tree.children(node.identifier): child_node.data *= node.data output.insert(0, node.data) if tree.children(node.identifier): stack += reversed(tree.children(node.identifier)) # produce bag total print("part #2:", sum(output) - 1)