def share_any_edges(G: nx.Graph, t1: tl.Tree, t2: tl.Tree) -> bool: leaves1 = [t.tag for t in t1.leaves()] leaves2 = [t.tag for t in t2.leaves()] # if something in t1 has an edge to something in t2, return true for u, v in G.edges(): if u in leaves1: if v in leaves2: return True if v in leaves1: if u in leaves2: return True # otherwise, return false. return False
def endpoint_cal(swc_p, unit , sep = ","): """ generate a multiBranch Tree from the swc file """ print(unit, sep) coords, labels, ids, pars = coords_get(swc_p, unit, sep) #coords += 1 if len(coords) == 0: print("{} is something wrong".format(swc_p)) sys.exit(0) ftree = Tree() ftree.create_node(ids[0], ids[0], data = coords[0]) for coord_, id_, par_ in zip(coords[1:], ids[1:], pars[1:]): #print(id_, par_) ftree.create_node(id_, id_, data = coord_, parent = par_) endpoint_coords = [x.data for x in ftree.leaves()] endpoint_coords.append(coords[0]) branch_coords = [x.data for x in ftree.all_nodes() if len(ftree.children(x.tag)) >= 2] endpoint_coords = np.array(endpoint_coords) branch_coords = np.array(branch_coords) coords = np.array(coords) return endpoint_coords, branch_coords, coords, ftree
def dasgupta_cost(G: nx.Graph, T: tl.Tree) -> float: cost = 0 for edge in G.edges: lca = get_lca(T, edge[0], edge[1]) subtree_leaves = T.leaves(lca) cost += len(subtree_leaves) return cost
class EntityTree(object): def __init__(self): self.tree = Tree() def create_ent_tree(self, ent_dct, cls_name_map, parent=None, cls_data_map={}): '''递归,依据遍历标签间层级关系生成树''' tag = ent_dct['LabelName'] name = cls_name_map.get(tag) data = cls_data_map.get(tag, 0) if not name: print("Error: tag %s not found in cls_name_map!" % tag) sys.exit(-1) if DEBUG: print("# of tree nodes: %d, tree height: %d, # of leaves: %d" % (len(self.tree), self.tree.depth(), len(self.tree.leaves()))) nd = self.tree.create_node(tag=name, parent=parent, data=data) if 'Subcategory' in ent_dct.keys(): for dct in ent_dct['Subcategory']: self.create_ent_tree(dct, cls_name_map, parent=nd.identifier, cls_data_map=cls_data_map)
def get_attribute(self, ting, attribute_name=None): counter = NodeCounter() task_tree = Tree() task_tree.create_node(identifier=counter.current_count, tag=ting.id, data=ting) self._build_tree(task_tree=task_tree, ting=ting, counter=counter) temp_leaves = task_tree.leaves() task_leaves = [] requirements = {} for t in temp_leaves: leaf = t.data["task"] task_leaves.append(leaf) req = leaf[FRECKLET_KEY_NAME].get("resources", {}) for k, v in req.items(): if isinstance(v, string_types) or not isinstance(v, Sequence): v = [v] requirements.setdefault(k, []).extend(v) result = { "task_tree": task_tree, "task_leaves": task_leaves, "resources": requirements, } return MultiCacheResult(**result)
class Scansion(object): """ .src : list of strings """ #/////////////////////////////////////////////////////////////////////////// def __init__(self, source_file): """ Scansion.__init__ source_file : (src) source file's name. """ self.htree = Tree() self.src = [] # creating root node (level 0) : self.htree.create_node(tag = "root", identifier = "root", data = Hypothesis(htree = self.htree, level=0, language=None, src=source_file)) # calling root node : msg(0, "Calling the root node.") stop = False while not stop: leaves_to_be_extended = [leave for leave in self.htree.leaves() if not leave.data.dead] for leave in leaves_to_be_extended: leave.data.go_on() if len(leaves_to_be_extended)==0: stop = True
def _build_tree(self, scores: ndarray, bin_edges: ndarray) -> Tree: # Build tree with specified number of children at each level tree = Tree() tree.add_node(Node()) # root node nodes_prev = [tree.get_node(tree.root)] for level in range(self.depth): nodes_current = [] for node in nodes_prev: children = [] for _ in range(self.n_children[level]): child = Node() tree.add_node(child, parent=node) children.append(child) nodes_current.extend(children) nodes_prev = nodes_current assignments = np.digitize(scores, bin_edges) - 1 # Store instance ids in leaves leaves = tree.leaves() for k, node in enumerate(leaves): instance_ids = np.where(assignments == k)[0] if instance_ids.size == 0: tree.remove_node(node.identifier) else: node.data = instance_ids # Prune empty leaves check_for_empty_leaves = True while check_for_empty_leaves: check_for_empty_leaves = False leaves = tree.leaves() for node in leaves: if node.data is None and len(node.successors( tree.identifier)) == 0: # Node is empty and has no siblings tree.remove_node(node.identifier) check_for_empty_leaves = True # Simplify tree: remove nodes that only have one child for nid in tree.expand_tree(mode=tree.WIDTH): children = tree.children(nid) if len(children) == 1: tree.link_past_node(nid) return tree
def map_tree_to_program(self, tree: Tree) -> str: self._node_to_subprog = {} frontier = [] # Tree nodes that are left to be explored for leaf in tree.leaves(): span = leaf.data.span self._node_to_subprog[span] = self._node_to_type(leaf) parent = tree.parent(leaf.identifier) if parent and parent not in frontier: frontier.append(tree.parent(leaf.identifier)) while frontier: node = frontier.pop() children = tree.children(node.identifier) assert len(children) == 2 # check if children were already discovered if not all([ child.data.span in self._node_to_subprog for child in children ]): frontier.insert(0, node) continue child_1 = self._node_to_subprog[children[0].data.span] child_2 = self._node_to_subprog[children[1].data.span] try: if child_1 and not child_2: # child_2=='NO_LABEL' self._node_to_subprog[node.data.span] = child_1 elif not child_1 and child_2: # child_1=='NO_LABEL' self._node_to_subprog[node.data.span] = child_2 elif not child_1 and not child_2: # Both children are assigned with 'NO_LABEL' self._node_to_subprog[node.data.span] = self._node_to_type( node) # ignore children and propagate parent else: assert child_2.is_full( ) # make sure child_2 value can be formed self._node_to_subprog[node.data.span] = child_1.apply( child_2) except Exception as e: try: self._node_to_subprog[node.data.span] = child_2.apply( child_1) except Exception as e: raise Exception('final apply_exception: {}'.format(e)) parent = tree.parent(node.identifier) if parent and parent not in frontier: frontier.insert(0, parent) inner_program = self._node_to_subprog[tree.get_node( tree.root).data.span].get_value() # return the root's value return inner_program
def our_cost(G: nx.Graph, T: tl.Tree) -> float: T_leaves = [n.tag for n in T.leaves()] cost = 0 for edge in G.edges: # only look at edges in this tree. if edge[0] in T_leaves and edge[1] in T_leaves: lca = get_lca(T, edge[0], edge[1]) subtree = T.subtree(lca) subtree_leaves = subtree.leaves() for leaf in subtree_leaves: cost += subtree.level(leaf.identifier) return cost
class Chain: def __init__(self): self.root = Tree() self.root.create_node(0, 0) # Genesis block # Aggiunge un blocco ad una catena def add_block(self, block): node = self.root.create_node(block.epoch, hash(block), block.hash) node.data = block # Stampa graficamente la catena in forma di albero def print_chain(self): self.root.show() # restituisce i blocchi di tutta la blockchain, sottoforma di lista def return_nodes_data(self): nodes = self.root.all_nodes() nodes_data = [] for i in range(1, len(nodes)): nodes_data.append(nodes[i].data) if len(nodes_data) == 0: nodes_data.append(0) return nodes_data # Resituisce la lista degli identificativi (numeri) delle foglie dell'albero (catena) def leaves(self): leaves = self.root.leaves(nid=None) leaves_identifiers = [] for i in range(len(leaves)): leaves_identifiers.append(leaves[i].identifier) return leaves_identifiers # ritorna tutte le epoche associate ai nodi della blockchain ''' def return_nodes(self): nodes = self.root.all_nodes() nodes_identifiers = [] for i in range(len(nodes)): nodes_identifiers.append(nodes[i].tag) return nodes_identifiers ''' # restituisce il blocco (oggetto) con l'epoca maggiore nella Chain def block_max_epoch(self): nodes = self.root.all_nodes() max_epoch = 0 block_max = 0 for i in range(len(nodes)): if nodes[i].tag > max_epoch: max_epoch = nodes[i].tag block_max = nodes[i].data return block_max
def build_parser_tree(org_str, parse_str): level_dict = defaultdict(list) punctuation = "',.!?[]()%#@&1234567890" parse_tree = Tree() parse_list = parse_str.replace("(", " ( ").replace(")", " ) ").strip().split() #print(parse_list) #print(org_str) org_str_list = nltk.word_tokenize(org_str) left_bracket_counter = 0 right_bracket_counter = 0 level = 0 for index, item in enumerate(parse_list): if item == "(": left_bracket_counter = left_bracket_counter + 1 continue if item == ")": right_bracket_counter = right_bracket_counter + 1 continue level = left_bracket_counter - right_bracket_counter try: if (item in ConstituencyParse and item not in punctuation) \ or (item not in org_str and item.isupper()): # 创建非叶子节点,如:ROOT、S、VP、NP if item == "ROOT": parse_tree.create_node(item, str(index)) level_dict[str(level)].append(str(index)) else: parse_tree.create_node(item, str(index), parent=level_dict[str(level-1)][-1]) level_dict[str(level)].append(str(index)) if item in org_str_list and item not in punctuation: # 创建叶子节点,每一个叶子节点都是句子中的一个单词 parse_tree.create_node(item, str(index), parent=level_dict[str(level)][-1]) level_dict[str(level+1)].append(str(index)) elif item in punctuation: # 创建标点符号的相关节点 if index > 0 and parse_list[index-1] == "(": parse_tree.create_node(item, str(index), parent=level_dict[str(level-1)][-1]) level_dict[str(level)].append(str(index)) else: parse_tree.create_node(item, str(index), parent=level_dict[str(level)][-1]) level_dict[str(level+1)].append(str(index)) except Exception as e: #print(str(e)) return None, parse_list #parse_tree.show() # 叶子节点中有词性标签,说明建树出错 for node in parse_tree.leaves(): if node.tag in ConstituencyParse: print("建立句法树出错!") return None, parse_list return parse_tree, parse_list
def generate(tree: Tree, parent: Node, token: Token, depth: int): node = tree.create_node(parent=parent, data=token) if isinstance(token, Terminal): return valid_rules = list( filter(lambda rule: checkRuleDepth(rule, depth), rule_table.getAllRules(token))) # print(token, valid_rules) rule = random.choice(valid_rules) list( map(lambda next_token: generate(tree, node, next_token, depth - 1), rule.rhs)) node.data.text = ' '.join( list(map(lambda leaf: leaf.data.name, tree.leaves(node.identifier))))
def map_tree_to_program(self, tree: Tree) -> str: self._node_to_subprog = {} frontier = [] # Tree nodes that are left to be explored for leaf in tree.leaves(): span = leaf.data.span self._node_to_subprog[span] = self._node_to_type(leaf) parent = tree.parent(leaf.identifier) if parent and parent not in frontier: frontier.append(tree.parent(leaf.identifier)) while frontier: node = frontier.pop() children = tree.children(node.identifier) assert len(children) in [2, 3] # check if children were already discovered if not all([ child.data.span in self._node_to_subprog for child in children ]): frontier.insert(0, node) continue if len(children) == 2: child_1 = self._node_to_subprog[children[0].data.span] child_2 = self._node_to_subprog[children[1].data.span] self._node_to_subprog[node.data.span] = self.merge_children( child_1, child_2, node) else: children.sort(key=lambda c: c.data.span[0]) child_1 = self._node_to_subprog[children[0].data.span] child_2 = self._node_to_subprog[children[1].data.span] child_3 = self._node_to_subprog[children[2].data.span] intermediate = self.merge_children(child_1, child_3, node) self._node_to_subprog[node.data.span] = self.merge_children( child_2, intermediate, node) parent = tree.parent(node.identifier) if parent and parent not in frontier: frontier.insert(0, parent) inner_program = self._node_to_subprog[tree.get_node( tree.root).data.span].get_value() # return the root's value return 'answer ( {} )'.format(inner_program)
class BlockChain: def __init__(self): self.root = Tree() self.root.create_node(0, 0) # Genesis block # Adds a block to a blockchain def add_block(self, block): node = self.root.create_node(block.epoch, hash(block), block.hash) node.data = block # Print the blockchain graphically def print_chain(self): self.root.show() # Returns the list containing all the blocks of the entire blockchain def return_nodes_data(self): nodes = self.root.all_nodes() nodes_data = [] for i in range(1, len(nodes)): nodes_data.append(nodes[i].data) if len(nodes_data) == 0: nodes_data.append(0) return nodes_data # Returns the list of identifiers of the leaves of the tree (blockchain) def leaves(self): leaves = self.root.leaves(nid=None) leaves_identifiers = [] for i in range(len(leaves)): leaves_identifiers.append(leaves[i].identifier) return leaves_identifiers # returns the Block object with the major epoch in the blockchain def block_max_epoch(self): nodes = self.root.all_nodes() max_epoch = 0 block_max = 0 for i in range(len(nodes)): if nodes[i].tag > max_epoch: max_epoch = nodes[i].tag block_max = nodes[i].data return block_max
def get_invoke_tree(self, method: MethodId, search_depth=3): tree = Tree(deep=search_depth, identifier=method.address) # Parent method with invoke address list tree.create_node(identifier=method, data=[]) for _ in range(search_depth): for leaf in tree.leaves(): uppers = self.apkinfo.find_upper_methods(leaf.identifier) for offset, upper in uppers: bytecode = self.apkinfo.find_bytecode_by_addr( upper.dexindex, offset) if not tree.contains(upper): tree.create_node(identifier=upper, data=[bytecode], parent=leaf) else: tree.get_node(upper).data.append(bytecode) return tree
def generatePool(N, maxDepth, tree, rule_table, debug=False): # Collecting all non-terminal nodes nodeIDs = [] for nodeID in tree.expand_tree(mode=Tree.DEPTH): nodeType = tree[nodeID].data if isinstance(nodeType, PiRL.DataStructures.Token.NonTerminal): nodeIDs.append(nodeID) neighbours = [] for _ in range(N): # Making a deep copy newTree = Tree(tree=tree, deep=True) # Selecting a random non-terminal to be replaced nodeToReplaceID = random.choice(nodeIDs) # Generating a new subtree newSubTree = getProgTree(newTree[nodeToReplaceID].data, maxDepth - newTree.depth(nodeToReplaceID)) # Replacing subtree try: newTree.replace_node(newTree[nodeToReplaceID].predecessor(newTree.identifier), nodeToReplaceID, newSubTree, deep=False) except Exception as e: # traceback.print_exc() if debug: print("Root node replaced") newTree = newSubTree if debug: print("Generated neighbour:", end=' ') # newTree.show(data_property='str') for leaf in newTree.leaves(): print(leaf.data.name, end=' ') print("\n") neighbours.append(newTree) return neighbours
def swap(tree): internalNodes = [n for n in tree.all_nodes_itr() if n.var != None] if len(internalNodes) == 1: return tree internalNodes.remove(tree[0]) cNode = random.choice(internalNodes) tagc = (cNode.identifier, cNode.var, cNode.split) pid = cNode.bpointer tree1 = Tree(tree, deep=True) sub = tree1.remove_subtree(pid) tags = recurTag(sub, pid) tagp = tags[0] tags[tags.index(tagc)] = (tagc[0], tagp[1], tagp[2]) tags[0] = (tagp[0], tagc[1], tagc[2]) string = f'{mi} swap {t}: {tags[0]}; ' try: sub1 = genTree(tree[pid], tags) except IndexError: print(string + 'unswappable') return tree #rTransit = 1 rLike = get_like_ratio(tree.R, sub.leaves(), sub1.leaves()) rStruct = get_struct(sub.all_nodes_itr(), sub1.all_nodes_itr()) r = rLike * rStruct print(string + f'{r.round(4)}') if random.uniform(0, 1) < r: if pid > 0: gpid = tree[pid].bpointer tree1.paste(gpid, sub1) tree1[gpid].fpointer = sorted(tree1[gpid].fpointer) else: tree1 = sub1 tree1.w2 = tree.w2 tree1.R = tree.R tree1.leaf = [n.identifier for n in tree1.leaves() if len(n.xvar) > 0] tree1.show() return tree1 return tree
for bagIn in bagList: for i in range(rules[bag][bagIn]): cnt += 1 t.create_node(bagIn, cnt, parent=nid) return cnt # Part A: find all bags the shiny_gold bag could hide inside count = 0 e = {'shiny_gold'} while len(e) > count: count = len(e) e = willCarry(rules, e) # Part B: build tree with bags, incrementing id counter for each t = Tree() t.create_node('shiny_gold', 0) cnt = 0 cntLast = -1 while (cnt > cntLast): print("Node Count {}".format(cnt)) cntLast = cnt for node in t.leaves(): cnt = addBagSet(rules, node.tag, t, node.identifier, cnt) # Display tree #t.show() print("The solution to part A is {0:d}".format(len(e) - 1)) print("The solution to part B is {0:d}".format(len(t.nodes) - 1))
def hierarchy(fna_mapping, dist): pending = list(fna_mapping.keys()) node_id = max(pending) + 1 mapping = bidict.bidict() cls_dist = [] cls_dist_temp = {} index = 0 pending.sort() for i in pending: mapping[i] = index index += 1 for i in range(0, len(pending)): temp1 = [] for j in range(0, i): temp1.append(cls_dist_temp[(mapping[pending[j]], mapping[pending[i]])]) for j in range(i, len(pending)): temp = cal_cls_dist(dist, fna_mapping[pending[i]], fna_mapping[pending[j]]) temp1.append(temp) cls_dist_temp[(mapping[pending[i]], mapping[pending[j]])] = temp cls_dist.append([np.array(temp1)]) cls_dist = np.concatenate(cls_dist) cls_dist_recls = cls_dist.copy() mapping_recls = mapping.copy() tree_relationship = {} pending = set(pending) while (len(pending) > 1): (child_a, child_b) = divmod(np.argmax(cls_dist), cls_dist.shape[1]) temp1 = [ np.concatenate([[cls_dist[child_a]], [cls_dist[child_b]]]).max(axis=0) ] cls_dist = np.concatenate([cls_dist, temp1], axis=0) temp1 = np.append(temp1, -1) temp1 = np.vstack(temp1) cls_dist = np.concatenate([cls_dist, temp1], axis=1) cls_dist = np.delete(cls_dist, [child_a, child_b], axis=0) cls_dist = np.delete(cls_dist, [child_a, child_b], axis=1) # change mapping cluster_a = mapping.inv[child_a] cluster_b = mapping.inv[child_b] # cluster id tree_relationship[node_id] = (cluster_a, cluster_b) del mapping[cluster_a], mapping[cluster_b] pending.remove(cluster_a) pending.remove(cluster_b) pending = sorted(list(pending)) for i in pending: if (mapping[i] > min([child_a, child_b]) and mapping[i] < max([child_a, child_b])): mapping[i] -= 1 elif (mapping[i] > max([child_a, child_b])): mapping[i] -= 2 mapping[node_id] = len(cls_dist) - 1 pending = set(pending) pending.add(node_id) node_id += 1 # build tree structure pending = list(pending) tree = Tree() T0 = Node(identifier=pending[0]) tree.add_node(T0) while (len(pending) > 0): parent = pending[0] for i in tree_relationship[parent]: tree.add_node(Node(identifier=i), parent=parent) if (i in tree_relationship): pending.append(i) pending.remove(parent) # load depth info depths = {} depths_mapping = defaultdict(set) leaves = set(tree.leaves()) for i in tree.all_nodes(): depths[i] = tree.depth(node=i) if (i in leaves): depths_mapping[depths[i]].add(i) return cls_dist_recls, mapping_recls, tree, depths, depths_mapping
def build_tree(arg): # read parameters start = time.time() dist_matrix_file = arg[0] cls_file = arg[1] tree_dir = arg[2] ksize = arg[3] params = arg[4] alpha_ratio = params[0] minsize = params[1] maxsize = params[2] max_cls_size = params[3] # save genomes info fna_seq = bidict.bidict() # : 1 fna_path = {} # read dist matrix (represented by similarity: 1-dist) # output: dist, fna_path, fna_seq f = open(dist_matrix_file, "r") lines = f.readlines() f.close() index = 0 d = lines[0].rstrip().split("\t")[1:] bac_label = 0 for i in lines[0].rstrip().split("\t")[1:]: temp = i[i.rfind('/') + 1:].split(".")[0] fna_seq[temp] = index fna_path[index] = i index += 1 dist = [] for line in lines[1:]: dist.append( [np.array(list(map(float, line.rstrip().split("\t")[1:])))]) dist = np.concatenate(dist) # read initial clustering results. fna_mapping, from 1 for indexing f = open(cls_file, 'r') lines = f.readlines() f.close() fna_mapping = defaultdict(set) for line in lines: temp = line.rstrip().split("\t") for i in temp[2].split(","): fna_mapping[int(temp[0])].add(fna_seq[i]) if (len(lines) == 1): tree = Tree() kmer_sta = defaultdict(int) T0 = Node(identifier=list(fna_mapping.keys())[0]) tree.add_node(T0) kmer_sta = defaultdict(int) kmer_index_dict = bidict.bidict() kmer_index = 1 alpha_ratio = 1 Lv = set() for i in fna_mapping[T0.identifier]: for seq_record in SeqIO.parse(fna_path[i], "fasta"): temp = str(seq_record.seq) for k in range(0, len(temp) - ksize): forward = temp[k:k + ksize] reverse = seqpy.revcomp(forward) for kmer in [forward, reverse]: try: kmer_sta[kmer_index_dict[kmer]] += 1 except KeyError: kmer_index_dict[kmer] = kmer_index kmer_sta[kmer_index] += 1 kmer_index += 1 alpha = len(fna_mapping[T0.identifier]) * alpha_ratio for x in kmer_sta: if (kmer_sta[x] >= alpha): Lv.add(x) print(T0.identifier, len(Lv)) # save2file kmerlist = set() pkl.dump(tree, open(tree_dir + '/tree.pkl', 'wb')) f = open(tree_dir + "/tree_structure.txt", "w") os.system("mkdir " + tree_dir + "/kmers") os.system("mkdir " + tree_dir + "/overlapping_info") f.write("%d\t" % T0.identifier) f.close() os.system(f'cp {cls_file} {tree_dir}/') f = open(tree_dir + "/reconstructed_nodes.txt", "w") f.close() if (len(Lv) > maxsize): Lv = set(random.sample(Lv, maxsize)) kmerlist = Lv length = len(Lv) f = open(tree_dir + "/kmers/" + str(T0.identifier), "w") for j in Lv: f.write("%d " % j) f.close() f = open(tree_dir + "/node_length.txt", "w") f.write("%d\t%d\n" % (T0.identifier, length)) kmer_mapping = {} index = 0 f = open(tree_dir + "/kmer.fa", "w") for i in kmerlist: f.write(">1\n") f.write(kmer_index_dict.inv[i]) kmer_mapping[i] = index index += 1 f.write("\n") f.close() # change index files = os.listdir(tree_dir + "/kmers") for i in files: f = open(tree_dir + "/kmers/" + i, "r") lines = f.readlines() if (len(lines) == 0): continue d = lines[0].rstrip().split(" ") d = map(int, d) f = open(tree_dir + "/kmers/" + i, "w") for j in d: f.write("%d " % kmer_mapping[j]) f.close() end = time.time() print( '- The total running time of tree-based indexing struture building is ', str(end - start), ' s\n') return # initially build tree cls_dist, mapping, tree, depths, depths_mapping = hierarchy( fna_mapping, dist) # initially extract k-mers kmer_index_dict = bidict.bidict() kmer_index = 1 Lv = defaultdict(set) spec = defaultdict(set) # k-mers <= alpha leaves = tree.leaves() for i in leaves: kmer_index = extract_kmers(fna_mapping[i.identifier], fna_path, ksize, kmer_index_dict, kmer_index, Lv, spec, tree_dir, alpha_ratio, i.identifier) end = time.time() print('- The total running time of k-mer extraction is ', str(end - start), ' s\n') start = time.time() # leaf nodes check recls_label = 0 leaves_check = [] check_waitlist = reversed(leaves) while (True): if (recls_label): cls_dist, mapping, tree, depths, depths_mapping = hierarchy( fna_mapping, dist) leaves = tree.leaves() temp = {} temp2 = [] for i in check_waitlist: if (i in fna_mapping): temp2.append(i) check_waitlist = temp2.copy() for i in check_waitlist: temp[tree.get_node(i)] = depths[tree.get_node(i)] check_waitlist = [] a = sorted(temp.items(), key=lambda x: x[1], reverse=True) for i in a: check_waitlist.append(i[0]) for i in fna_mapping: if (i not in Lv): kmer_index = extract_kmers(fna_mapping[i], fna_path, ksize, kmer_index_dict, kmer_index, Lv, spec, tree_dir, alpha_ratio, i) higher_union = defaultdict(set) for i in check_waitlist: diff, diff_nodes = get_leaf_union(depths[i], higher_union, depths_mapping, Lv, spec, i) kmer_t = Lv[i.identifier] - diff for j in diff_nodes: kmer_t = kmer_t - Lv[j.identifier] for j in diff_nodes: kmer_t = kmer_t - spec[j.identifier] print(str(i.identifier) + " checking", end="\t") print(len(kmer_t)) if (len(kmer_t) < minsize): leaves_check.append(i) if (len(leaves_check) > 0): recls_label = 1 else: break # re-clustering check_waitlist = [] while (recls_label == 1): cluster_id = max(list(fna_mapping.keys())) + 1 check_waitlist.append(cluster_id) leaf_a = leaves_check[0].identifier row_index = mapping[leaf_a] column_index = cls_dist[row_index].argmax() leaf_b = mapping.inv[column_index] # (leaf_a, leaf_b) temp2 = fna_mapping[leaf_a] | fna_mapping[leaf_b] print(cluster_id, leaf_a, leaf_b, temp2) del fna_mapping[leaf_a], fna_mapping[leaf_b] if (leaf_a in Lv): del Lv[leaf_a], spec[leaf_a] if (leaf_b in Lv): del Lv[leaf_b], spec[leaf_b] del leaves_check[0] if (tree.get_node(leaf_b) in leaves_check): leaves_check.remove(tree.get_node(leaf_b)) temp1 = [ np.concatenate([[cls_dist[row_index]], [cls_dist[column_index]]]).max(axis=0) ] cls_dist = np.concatenate([cls_dist, temp1], axis=0) temp1 = np.append(temp1, -1) temp1 = np.vstack(temp1) cls_dist = np.concatenate([cls_dist, temp1], axis=1) cls_dist = np.delete(cls_dist, [row_index, column_index], axis=0) cls_dist = np.delete(cls_dist, [row_index, column_index], axis=1) # change mapping del mapping[leaf_a], mapping[leaf_b] pending = list(fna_mapping.keys()) pending.sort() for i in pending: if (mapping[i] > min([row_index, column_index]) and mapping[i] < max([row_index, column_index])): mapping[i] -= 1 elif (mapping[i] > max([row_index, column_index])): mapping[i] -= 2 fna_mapping[cluster_id] = temp2 mapping[cluster_id] = len(cls_dist) - 1 if (len(leaves_check) == 0): break del higher_union # rebuild identifiers all_nodes = tree.all_nodes() all_leaves_id = set([]) leaves = set(tree.leaves()) for i in leaves: all_leaves_id.add(i.identifier) id_mapping = bidict.bidict() index = 1 index_internal = len(leaves) + 1 for i in all_nodes: if (recls_label == 0): id_mapping[i.identifier] = i.identifier elif (i in leaves): id_mapping[i.identifier] = index index += 1 else: id_mapping[i.identifier] = index_internal index_internal += 1 leaves_identifier = list(range(1, len(leaves) + 1)) all_identifier = list(id_mapping.values()) all_identifier.sort() # save2file f = open(tree_dir + "/tree_structure.txt", "w") os.system("mkdir " + tree_dir + "/kmers") os.system("mkdir " + tree_dir + "/overlapping_info") for nn in all_identifier: i = id_mapping.inv[nn] f.write("%d\t" % id_mapping[i]) if (i == all_nodes[0].identifier): f.write("N\t") else: f.write("%d\t" % id_mapping[tree.parent(i).identifier]) if (nn in leaves_identifier): f.write("N\t") else: [child_a, child_b] = tree.children(i) f.write("%d %d\t" % (id_mapping[child_a.identifier], id_mapping[child_b.identifier])) if (len(fna_mapping[i]) == 1): temp = list(fna_mapping[i])[0] temp = fna_seq.inv[temp] f.write("%s" % temp) f.write("\n") f.close() f = open(tree_dir + "/hclsMap_95_recls.txt", "w") for nn in leaves_identifier: i = id_mapping.inv[nn] f.write("%d\t%d\t" % (nn, len(fna_mapping[i]))) temp1 = list(fna_mapping[i]) for j in temp1: temp = fna_seq.inv[j] if (j == temp1[-1]): f.write("%s\n" % temp) else: f.write("%s," % temp) f.close() end = time.time() print('- The total running time of re-clustering is ', str(end - start), ' s\n') start = time.time() # build indexing structure kmerlist = set([]) # all kmers used length = {} overload_label = 0 if (len(tree.leaves()) > max_cls_size): overload_label = 1 # from bottom to top (unique k-mers) uniq_temp = defaultdict(set) rebuilt_nodes = [] descendant = defaultdict(set) # including itself ancestor = defaultdict(set) descendant_leaves = defaultdict(set) ancestor[all_nodes[0].identifier].add(all_nodes[0].identifier) for i in all_nodes[1:]: ancestor[i.identifier] = ancestor[tree.parent( i.identifier).identifier].copy() ancestor[i.identifier].add(i.identifier) for i in reversed(all_nodes): print(str(id_mapping[i.identifier]) + " k-mer removing...") if (i in leaves): uniq_temp[i.identifier] = Lv[i.identifier] descendant_leaves[i.identifier].add(i.identifier) else: (child_a, child_b) = tree.children(i.identifier) descendant[i.identifier] = descendant[ child_a.identifier] | descendant[child_b.identifier] descendant_leaves[i.identifier] = descendant_leaves[ child_a.identifier] | descendant_leaves[child_b.identifier] uniq_temp[i.identifier] = uniq_temp[ child_a.identifier] & uniq_temp[child_b.identifier] uniq_temp[child_a.identifier] = uniq_temp[ child_a.identifier] - uniq_temp[i.identifier] uniq_temp[child_b.identifier] = uniq_temp[ child_b.identifier] - uniq_temp[i.identifier] descendant[i.identifier].add(i.identifier) all_nodes_id = set(id_mapping.keys()) # remove overlapping for i in reversed(all_nodes): print(str(id_mapping[i.identifier]) + " k-mer set building...") # no difference with sibling, subtree and ancestors if (i == all_nodes[0]): kmer_t = uniq_temp[i.identifier] else: diff = {} temp = all_nodes_id - descendant[i.identifier] - set([ tree.siblings(i.identifier)[0].identifier ]) - ancestor[i.identifier] for j in temp: diff[j] = len(uniq_temp[j]) a = sorted(diff.items(), key=lambda x: x[1], reverse=True) kmer_t = uniq_temp[i.identifier] for j in a: k = j[0] kmer_t = kmer_t - uniq_temp[k] # remove special k-mers temp = all_leaves_id - descendant_leaves[i.identifier] diff = {} for j in temp: diff[j] = len(spec[j]) a = sorted(diff.items(), key=lambda x: x[1], reverse=True) for j in a: k = j[0] kmer_t = kmer_t - spec[k] if (len(kmer_t) < minsize and overload_label == 0): rebuilt_nodes.append(i) print("%d waiting for reconstruction..." % id_mapping[i.identifier]) else: if (len(kmer_t) > maxsize): kmer_t = set(random.sample(kmer_t, maxsize)) f = open(tree_dir + "/kmers/" + str(id_mapping[i.identifier]), "w") for j in kmer_t: f.write("%d " % j) f.close() length[i] = len(kmer_t) kmerlist = kmerlist | kmer_t del uniq_temp # rebuild nodes overlapping = defaultdict(dict) intersection = defaultdict(set) higher_union = defaultdict(set) del_label = {} for i in leaves: del_label[i.identifier] = [0, 0] for i in rebuilt_nodes: print(str(id_mapping[i.identifier]) + " k-mer set rebuilding...") kmer_t = get_intersect(intersection, descendant_leaves[i.identifier], Lv, del_label, i.identifier) diff = get_diff(higher_union, descendant_leaves, depths, all_nodes, i, Lv, spec, del_label) for j in diff: kmer_t = kmer_t - j lower_leaves = set([]) for j in leaves: if (depths[j] < depths[i]): lower_leaves.add(j) if (len(kmer_t) > maxsize): kmer_overlapping_sta = defaultdict(int) for j in lower_leaves: kmer_o = Lv[j.identifier] & kmer_t for k in kmer_o: kmer_overlapping_sta[k] += 1 temp = sorted(kmer_overlapping_sta.items(), key=lambda kv: (kv[1], kv[0])) kmer_t = set([]) for j in range(0, maxsize): kmer_t.add(temp[j][0]) nkmer = {} f = open(tree_dir + "/kmers/" + str(id_mapping[i.identifier]), "w") index = 0 for j in kmer_t: f.write("%d " % j) nkmer[j] = index index += 1 length[i] = len(kmer_t) kmerlist = kmerlist | kmer_t # save overlapping info for j in lower_leaves: temp = Lv[j.identifier] & kmer_t if (len(temp) > 0): ii = id_mapping[i.identifier] jj = id_mapping[j.identifier] overlapping[jj][ii] = set([]) for k in temp: overlapping[jj][ii].add(nkmer[k]) delete(Lv, spec, del_label) for i in overlapping: f = open(tree_dir + "/overlapping_info/" + str(i), "w") f1 = open(tree_dir + "/overlapping_info/" + str(i) + "_supple", "w") count = -1 for j in overlapping[i]: if (len(overlapping[i]) != 0): f.write("%d\n" % j) for k in overlapping[i][j]: f.write("%d " % k) f.write("\n") count += 2 f1.write("%d %d\n" % (j, count)) f.close() f1.close() # final saving f = open(tree_dir + "/reconstructed_nodes.txt", "w") for i in rebuilt_nodes: f.write("%d\n" % id_mapping[i.identifier]) f.close() f = open(tree_dir + "/node_length.txt", "w") for nn in all_identifier: i = id_mapping.inv[nn] f.write("%d\t%d\n" % (nn, length[tree[i]])) f.close() kmer_mapping = {} index = 0 f = open(tree_dir + "/kmer.fa", "w") for i in kmerlist: f.write(">1\n") f.write(kmer_index_dict.inv[i]) kmer_mapping[i] = index index += 1 f.write("\n") f.close() # change index files = os.listdir(tree_dir + "/kmers") for i in files: f = open(tree_dir + "/kmers/" + i, "r") lines = f.readlines() if (len(lines) == 0): continue d = lines[0].rstrip().split(" ") d = map(int, d) f = open(tree_dir + "/kmers/" + i, "w") for j in d: f.write("%d " % kmer_mapping[j]) f.close() end = time.time() print( '- The total running time of tree-based indexing struture building is ', str(end - start), ' s\n')
class GradientBoostingTree(object): def __init__(self, X, y, k=4, epochs=200, loss='mse', metrics=['mae'], learning_rate=0.001, layers=3, ending_units=256, optimizers=[Adam, Nadam, RMSprop], early_stop=None, seed=None): self.X = X self.y = y self.k = k self.epochs = epochs self.loss = loss self.metrics = metrics self.learning_rate = learning_rate self.layers = layers self.ending_units = ending_units self.optimizers = optimizers self.early_stop = early_stop self.generator = secrets.SystemRandom(seed) self.models = [] self.tree = Tree() def fit(self): y = self.y preds = np.zeros(len(y)) def fit_leaf(preds, y, i): optimizer = self.optimizers[self.generator.randint( 0, len(self.optimizers) - 1)](learning_rate=self.learning_rate) model = network_builder(layers=self.layers, ending_units=self.ending_units) model.compile(loss=self.loss, optimizer=optimizer, metrics=self.metrics) model.fit(self.X, y, epochs=self.epochs * (i + 1), callbacks=[self.early_stop] if self.early_stop else []) new_preds = preds + model.predict(self.X).reshape(-1) new_y = self.y - new_preds return new_preds, new_y, model def fit_tree(k, preds, y, i, parent=None): if k > 0: l_preds, l_y, l_model = fit_leaf(preds, y, i) self.tree.add_node(Node(identifier=parent + 'l' + str(k), data=l_model), parent=parent) fit_tree(k - 1, l_preds, l_y, i + 1, parent + 'l' + str(k)) r_preds, r_y, r_model = fit_leaf(preds, y, i) self.tree.add_node(Node(identifier=parent + 'r' + str(k), data=r_model), parent=parent) fit_tree(k - 1, r_preds, r_y, i + 1, parent + 'r' + str(k)) i = 0 preds, y, root_model = fit_leaf(preds, y, i) self.tree.add_node(Node(identifier='root', data=root_model)) fit_tree(self.k, preds, y, i + 1, parent='root') def predict(self, X): preds = [] leafs = self.tree.leaves('root') for path in self.tree.paths_to_leaves(): p = .0 for id in path: p += self.tree.get_node(id).data.predict(X) preds.append(p) return sum(preds) / len(leafs)
def qmaxrtc(q, leaf_set, triplets): # first we need to construct a full binary tree # assume q=2^k-1 for k>1. So we want to construct a complete binary tree of depth # k-1 and this will yield a tree of size q depth_tree = int(math.log(q + 1, 2)) - 1 node_ids = [ -(i + 1) for i in range(0, q) ] # ensure our labeling set is negative to be disjoint with leaf_set new_tr = Tree() new_tr.create_node("n0", 0) create_binary_tree(new_tr, 0, node_ids, depth_tree, 0) tree_leaf_set = [n.identifier for n in new_tr.leaves()] num_pairs_diff_subtree = {} num_pairs_same_tree = {} num_pairs_diff_subtree[0] = 0 num_pairs_same_tree[0] = 0 computer_diff_pairs_subtree(new_tr, 0, 0, num_pairs_diff_subtree, num_pairs_same_tree, q) # our goal is to asssign each leaf in leaf_set to an element of tree_leaf_set assignments = {} for leaf in leaf_set: assignments[leaf] = [] prev = (1 / 3 - 4 / 3 * (q + 1) * (q + 1)) * len(triplets) # assign each leaf for leaf in leaf_set: leaf_triplets = [] # to compute all triplets xy|z that l is a part of for t in triplets: if t == leaf: for f_set in triplets[t]: leaf_triplets.append((f_set, leaf)) else: for f_set in triplets[t]: for elm in f_set: if elm == leaf: leaf_triplets.append((f_set, t)) vals = {} # initialization # vals[potential_parent] will be E[W|assignments, leaf assigned to potential_parent] at the end for potential_parent in tree_leaf_set: vals[potential_parent] = prev for (f_set, z) in leaf_triplets: x, y = f_set # consider every possible assignment of leaf to a node in tree_leaf_set # base case is leaf is unassaigned assignments[leaf] = [] for potential_parent in tree_leaf_set: prob = prq(assignments[x], assignments[y], assignments[z], new_tr, len(tree_leaf_set), q, num_pairs_diff_subtree, num_pairs_same_tree) # print("prob") # print(prob) vals[potential_parent] -= prob for potential_parent in tree_leaf_set: assignments[leaf] = [potential_parent] prob = prq(assignments[x], assignments[y], assignments[z], new_tr, len(tree_leaf_set), q, num_pairs_diff_subtree, num_pairs_same_tree) # print("prob2") # print(prob) vals[potential_parent] += prob # now we want to compute max expectation over all possible # assignments of leaf to a parent max_val = -1 max_parent = 0 for potential_parent in tree_leaf_set: # print(vals[potential_parent]) if vals[potential_parent] > max_val: max_val = vals[potential_parent] max_parent = potential_parent assignments[leaf] = [max_parent] prev = max_val # now we want to assign every leaf to its respective parent for leaf in leaf_set: parent_leaf = assignments[leaf][0] # print("assignemnt") # print(parent_leaf) new_tr.create_node("l" + str(leaf), leaf, parent=parent_leaf) return new_tr
def fov_connect(fov_ins_array): def parent(edges, i): coords = np.where( edges == i ) edge = edges[ coords[0][0] ] if edge[0] == i: return edge[1] + 1 return edge[0] + 1 skels = kimimaro.skeletonize( fov_ins_array, teasar_params={ 'scale': 4, 'const': 500, # physical units 'pdrf_exponent': 4, 'pdrf_scale': 100000, 'soma_detection_threshold': 1100, # physical units 'soma_acceptance_threshold': 3500, # physical units 'soma_invalidation_scale': 1.0, 'soma_invalidation_const': 300, # physical units 'max_paths': None, # default None }, dust_threshold=50, anisotropy=(200,200,1000), # default True fix_branching=True, # default True fix_borders=True, # default True progress=True, # default False parallel=2, # <= 0 all cpu, 1 single process, 2+ multiprocess ) ends_dict = {} fov_ins_skel_array = np.zeros_like(fov_ins_array) ends_array = np.zeros_like(fov_ins_array) for label_ in skels: skel = skels[label_] coords = (skel.vertices / np.array([200, 200, 1000])).astype(int) fov_ins_skel_array[coords[:, 0], coords[:, 1], coords[:, 2]] = label_ coords = coords.tolist() edges = skel.edges.tolist() ftree = Tree() cur_ = edges[0][0] ftree.create_node(cur_, cur_, data = coords[0]) cur_list = [cur_] while(len(edges) > 0 and len(cur_list) > 0): _cur_list = [] edges_ = edges[:] #print(cur_list) for cur_ in cur_list: next_inds = np.where(np.array(edges_) == cur_)[0] if len(next_inds) == 0:continue for next_ind in next_inds: edge_ = edges_[next_ind] edges.remove(edge_) #print(cur_, edge_) if edge_[0] == cur_: next_ = edge_[-1] else: next_ = edge_[0] _cur_list.append(next_) ftree.create_node(next_, next_, data = coords[next_], parent = cur_) edges_ = edges[:] cur_list = _cur_list ends = [x.data for x in ftree.leaves()] ends.append(coords[0]) ends_dict[label_] = ends ends_ = np.array(ends) ends_array[ends_[:, 0], ends_[:, 1], ends_[:, 2]] = 1 #ends_array = dilation(ends_array, ball(1)) return fov_ins_skel_array, ends_array, ends_dict
class TreeT(object): def __init__(self, max_id=0): self.tree = Tree() def from_ptb_to_tree(self, line, max_id=0, leaf_id=1, parent_id=None): # starts by ['(', 'pos'] pos_tag = line[1] if parent_id is None: pos_id = 0 else: pos_id = max_id max_id += 1 self.tree.create_node(pos_tag, pos_id, parent_id, TreeData()) parent_id = pos_id total_offset = 2 if line[2] != '(': # sub-tree is leaf # line[0:3] = ['(', 'pos', 'word', ')'] word_tag = line[2] self.tree.create_node(word_tag, leaf_id, parent_id, TreeData()) return 4, max_id, leaf_id + 1 line = line[2:] while line[0] != ')': offset, max_id, leaf_id = self.from_ptb_to_tree( line, max_id, leaf_id, parent_id) total_offset += offset line = line[offset:] return total_offset + 1, max_id, leaf_id def add_height(self, tree_dep): for n in self.tree.all_nodes(): n.data.leaves = [] for leaf in self.tree.leaves(): lid = leaf.identifier hid = tree_dep[lid] if hid == self.tree.root: self.tree[lid].data.height = self.tree.depth(self.tree[lid]) for cid in [ p for p in self.tree.paths_to_leaves() if lid in p ][0]: self.tree[cid].data.leaves += [lid] else: height = -1 cid = lid cond = True while cond: self.tree[cid].data.leaves += [lid] height += 1 cid = self.tree.parent(cid).identifier cid_leaves = [l.identifier for l in self.tree.leaves(cid)] cid_l_dep = [tree_dep[l] for l in cid_leaves if l != lid] cond = set(cid_l_dep).issubset(set(cid_leaves)) self.tree[lid].data.height = height x_nodes = [ n.identifier for n in self.tree.all_nodes() if n.data.leaves == [] ] for x_node in x_nodes[::-1]: min_id = min(self.tree.children(x_node), key=lambda c: c.data.height) _lid = min_id.data.leaves[0] self.tree[_lid].data.height += 1 self.tree[x_node].data.leaves += [_lid] return True def _from_tree_to_ptb(self, nid): nid = self.tree.subtree(nid).root if self.tree[nid].is_leaf(): return ' (' + self.tree[nid].tag + ' ' + self.tree[ nid].data.word + ')' res = ' (' + self.tree[nid].tag for c_nid in sorted(self.tree.children(nid), key=lambda x: x.identifier): res += self._from_tree_to_ptb(c_nid.identifier) return res + ')' def from_tree_to_ptb(self): return self._from_tree_to_ptb(self.tree.root) def from_tag_to_tree(self, tag, word, pos_id=0): parent_id = None for tag_nodes in tag: if tag_nodes[0] in [CL, CR]: c_side = tag_nodes[0] _tag_nodes = tag_nodes[1:] if len(tag_nodes) > 1 else [''] else: c_side = '' _tag_nodes = tag_nodes self.tree.create_node(_tag_nodes[0], pos_id, parent=parent_id, data=TreeData(comb_side=c_side)) parent_id = pos_id pos_id += 1 for tag_node in _tag_nodes[1:]: self.tree.create_node(tag_node[1:], pos_id, parent=parent_id, data=TreeData(miss_side=tag_node[0])) pos_id += 1 for l in self.tree.leaves(): if l.data.miss_side == '': l.data.word = word break return pos_id @memoize def is_combine_to(self, side): return self.tree[self.tree.root].data.comb_side == side @memoize def is_combine_right(self): return self.is_combine_to(CR) @memoize def is_combine_left(self): return self.is_combine_to(CL) @memoize def is_complete_tree(self): return all([n.data.miss_side == '' for n in self.tree.all_nodes()]) @memoize def get_missing_leaves_to(self, miss_val, side): return [ l.identifier for l in self.tree.leaves(self.tree.root) if l.data.miss_side == side and l.tag == miss_val ] @memoize def get_missing_leaves_left(self, miss_val): return self.get_missing_leaves_to(miss_val, L) @memoize def get_missing_leaves_right(self, miss_val): return self.get_missing_leaves_to(miss_val, R) @memoize def root_tag(self): return self.tree[self.tree.root].tag @memoize def is_no_missing_leaves(self): return all( [l.data.miss_side == '' for l in self.tree.leaves(self.tree.root)]) @memoize def combine_tree(self, _tree, comb_leaf): self.tree.paste(comb_leaf, _tree.tree) self.tree.link_past_node(comb_leaf) return self def tree_to_path(self, nid, path): # Stop condition if self.tree[nid].is_leaf(): path[nid] = [] return nid, self.tree[nid].data.height # Recursion flag = CR for child in self.tree.children(nid): cid = child.identifier leaf_id, height = self.tree_to_path(cid, path) if (height == 0): # Reached end of path can add flag path[leaf_id].insert(0, flag) # path[leaf_id].append(flag) if height > 0: path[leaf_id].insert(0, nid) # only single child will have height>0 # and its value will be the one that is returned # to the parent ret_leaf_id, ret_height = leaf_id, height - 1 # once we reached a height>0, it means that # this path includes the parent, and thus flag # direction should flip flag = CL return ret_leaf_id, ret_height def path_to_tags(self, path): tags = [] for p in path: _res = [] _p = copy.copy(p) if _p[0] in [CL, CR]: _res.append(_p[0]) _p = _p[1:] while _p[:-1]: el_p = _p.pop(0) _res.append(self.tree[el_p].tag) for c in self.tree.children(el_p): if c.identifier != _p[0]: _res.append(R + c.tag if c.identifier > _p[0] else L + c.tag) _res.append(self.tree[_p[0]].tag) tags.append(_res) return tags def path_to_words(self, path): return [self.tree[k].tag for k in path] def from_tree_to_tag(self): path = {} self.tree_to_path(self.tree.root, path) return { 'tags': self.path_to_tags(path.values()), 'words': self.path_to_words(path.keys()) } def from_ptb_to_tag(self, line, max_id, depend): self.from_ptb_to_tree(line, max_id) self.add_height(depend) path = {} self.tree_to_path(self.tree.root, path) return self.path_to_tags(path.values())
def change(tree): nidInternal = nidValid(tree) choices = [getChoice(tree, n) for n in nidInternal] n_choices = map(lambda L: sum([len(i) for i in L]), choices) choiceDic = { a: b for (a, b, c) in zip(nidInternal, choices, n_choices) if c > 1 } choices1 = list(choiceDic.keys()) nid = random.choice(choices1) p = tree[nid].data.shape[1] x0 = tree[nid].var s0 = tree[nid].split choices = choiceDic[nid] # choose nid to split if s0 in choices[x0 - 1]: choices[x0 - 1].remove(s0) # remove original split option choices2 = [i for i in range(p - 1) if len(choices[i]) > 0] # choose var to split x = random.choice(choices2) choices3 = choices[x] # choose value to split x += 1 s = random.choice(choices3) tree1 = Tree(tree, deep=True) pid = tree1[nid].bpointer sub = tree1.remove_subtree(nid) tags = recurTag(sub, nid) tags[0] = (nid, x, s) try: sub1 = genTree(sub[nid], tags) except IndexError: print(f'{mi} change {t}: {tags[0]}; unchangable') return tree if pid is not None: tree1.paste(pid, sub1) tree1[pid].fpointer = sorted(tree1[pid].fpointer) else: tree1 = sub1 nidInternal1 = set(nidValid(tree1)) choices1 = set(choices1) choices11 = nidInternal1.intersection(choices1) extra = nidInternal1 - choices1 n_choices = map(lambda L: sum([len(i) for i in L]), [getChoice(tree1, n) for n in extra]) choices11 = list(choices11) + [ a for (a, b) in zip(extra, n_choices) if b > 1 ] choices31 = getChoice(tree1, nid, x0)[x0 - 1] n31 = len(choices31) if (sub1[nid].var == sub[nid].var) and (s0 in choices31): n31 -= 1 rTransit = len(choices1) * len(choices3) / (len(choices11) * n31) rLike = get_like_ratio(tree.R, sub.leaves(), sub1.leaves()) rStruct = get_struct(sub.all_nodes_itr(), sub1.all_nodes_itr()) r = rLike * rTransit * rStruct print(f'{mi} change {t}: {tags[0]}; r={r.round(4)}') if random.uniform(0, 1) < r: tree1.w2 = tree.w2 tree1.R = tree.R tree1.leaf = [n.identifier for n in tree1.leaves() if len(n.xvar) > 0] tree1.show() return tree1 return tree
#for i in range(m): # for l in trees[i].leaves(): # idx = l.data.index # MM.loc[idx,i] = mumu #%% Like = zeros(T) Rmse = zeros(T) # = zeros(T) for t in range(T): var_ratio = var / var_mu for mi in range(m): trees[mi].R = y - (MM.sum(axis=1) - MM.iloc[:, mi]) tree = drawTree(trees[mi]) g = any(map(drawM, tree.leaves())) #for l in tree.leaves(): drawM(l) trees[mi] = tree tdic[t * m + mi] = tree2dic(tree) yhat = MM.sum(axis=1).values Yhat[:, t] = yhat e = (y - yhat).values sse = e @ e Rmse[t] = sqrt(sse / n0) Like[t] = log(norm.pdf(e, scale=sqrt(var))).sum() b = ig2 + 0.5 * sse lamda = np.random.gamma(a, 1 / b) var = 1 / lamda Depth_mu[t] = array([tr.depth() for tr in trees]).mean() yhat = Yhat[:, burn:].mean(axis=1)
leaf = partition.split(vertex, partidx) subTree.create_node(leaf.PaintedVertices, leaf) if leaf.isatomic(): return subTree # recurse onto children nodes to build partition tree depth first for v in leaf.Parts[leaf.nextsplitting()]: subTree.paste(leaf, branch(leaf, v, leaf.nextsplitting())) return subTree from treelib import Node, Tree tree = Tree() tree.create_node(P0.PaintedVertices, P0) # root node if not P0.isatomic(): for v in P0.Parts[P0.nextsplitting()]: tree.paste(P0, branch(P0, v, P0.nextsplitting())) tree.show() for node in tree.leaves(): # print(node.identifier.permutation()) P = node.identifier sG = P.applyautomorphism() print(lexifyedges(sG)) # P1 = tree.leaves()[0].identifier # p = P1.permutation() # G1 = P1.applyautomorphism()
class StepParse: def __init__(self): pass def load_step(self, step_filename): self.nauo_lines = [] self.prod_def_lines = [] self.prod_def_form_lines = [] self.prod_lines = [] self.filename = os.path.splitext(step_filename)[0] line_hold = '' line_type = '' # Find all search lines with open(step_filename) as f: for line in f: # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines index = re.search("#(.*)=", line) if index: # TH: if not none then it is the start of a line so read it # want to hold line until it has checked next line # if next line is a new indexed line then save previous line if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' prev_index = True # TH remember previous line had an index if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line: line_hold = line.rstrip() line_type = 'nauo' elif ('PRODUCT_DEFINITION ' in line or 'PRODUCT_DEFINITION(' in line): line_hold = line.rstrip() line_type = 'prod_def' elif 'PRODUCT_DEFINITION_FORMATION' in line: line_hold = line.rstrip() line_type = 'prod_def_form' elif ('PRODUCT ' in line or 'PRODUCT(' in line): line_hold = line.rstrip() line_type = 'prod' else: prev_index = False #TH: if end of file and previous line was held if 'ENDSEC;' in line: if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' else: #TH: if not end of file line_hold = line_hold + line.rstrip() self.nauo_refs = [] self.prod_def_refs = [] self.prod_def_form_refs = [] self.prod_refs = [] # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words' # Find all (# hashed) line references and product names # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't for j, el_ in enumerate(self.nauo_lines): self.nauo_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_def_lines): self.prod_def_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_def_form_lines): self.prod_def_form_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_lines): self.prod_refs.append([ el.strip(',') for el in el_.replace(",", " ").replace( "(", " ").replace("=", " ").split() if el.startswith('#') ]) self.prod_refs[j].append(el_.split("'")[1]) # Get first two items in each sublist (as third is shape ref) # # First item is 'PRODUCT_DEFINITION' ref # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref self.prod_all_refs = [el[:2] for el in self.prod_def_refs] # Match up all references down to level of product name for j, el_ in enumerate(self.prod_all_refs): # Add 'PRODUCT_DEFINITION' ref for i, el in enumerate(self.prod_def_form_refs): if el[0] == el_[1]: el_.append(el[1]) break # Add names from 'PRODUCT_DEFINITION' lines for i, el in enumerate(self.prod_refs): if el[0] == el_[2]: el_.append(el[2]) break # Find all parent and child relationships (3rd and 2nd item in each sublist) self.parent_refs = [el[1] for el in self.nauo_refs] self.child_refs = [el[2] for el in self.nauo_refs] # Find distinct parts and assemblies via set operations; returns list, so no repetition of items self.all_type_refs = set(self.child_refs) | set(self.parent_refs) self.ass_type_refs = set(self.parent_refs) self.part_type_refs = set(self.child_refs) - set(self.parent_refs) #TH: find root node self.root_type_refs = set(self.parent_refs) - set(self.child_refs) # Create simple parts dictionary (ref + label) self.part_dict = {el[0]: el[3] for el in self.prod_all_refs} # self.part_dict_inv = {el[3]:el[0] for el in self.prod_all_refs} def show_values(self): # TH: basic testing, if needed these could be spilt up print(self.nauo_lines) print(self.prod_def_lines) print(self.prod_def_form_lines) print(self.prod_lines) print(self.nauo_refs) print(self.prod_def_refs) print(self.prod_def_form_refs) print(self.prod_refs) # HR: "create_dict" replaced by list comprehension elsewhere # # def create_dict(self): # # # TH: links nauo number with a name and creates dict # self.part_dict = {} # for part in self.all_type_refs: # for sublist in self.prod_def_refs: # if sublist[0] == part: # prod_loc = '#' + re.findall('\d+',sublist[1])[0] # pass # for sublist in self.prod_def_form_refs: # if sublist[0] == prod_loc: # prod_loc = '#' + str(re.findall('\d+',sublist[1])[0]) # pass # for sublist in self.prod_refs: # if sublist[0] == prod_loc: # part_name = sublist[2] # # self.part_dict[part] = part_name def create_tree(self): #TH: create tree diagram in newick format #TH: find root node self.tree = Tree() #TH: check if there are any parts to make a tree from, if not don't bother if self.part_dict == {}: return root_node_ref = list(self.root_type_refs)[0] # HR added part reference as data for later use self.tree.create_node(self.part_dict[root_node_ref], 0, data={'ref': root_node_ref}) #TH: created root node now fill in next layer #TH: create dict for tree, as each node needs a unique name i = [0] # Iterates through nodes self.tree_dict = {} self.tree_dict[i[0]] = root_node_ref def tree_next_layer(self, parent): root_node = self.tree_dict[i[0]] for line in self.nauo_refs: if line[1] == root_node: i[0] += 1 self.tree_dict[i[0]] = str(line[2]) # HR added part reference as data for later use self.tree.create_node(self.part_dict[line[2]], i[0], parent=parent, data={'ref': str(line[2])}) tree_next_layer(self, i[0]) tree_next_layer(self, 0) self.appended = False self.get_levels() def get_levels(self): # Initialise dict and get first level (leaves) self.levels = {} self.levels_set_p = set() self.levels_set_a = set() self.leaf_ids = [el.identifier for el in self.tree.leaves()] self.all_ids = [el for el in self.tree.nodes] self.non_leaf_ids = set(self.all_ids) - set(self.leaf_ids) self.part_level = 1 def do_level(self, tree_level): # Get all nodes within this level node_ids = [ el for el in self.tree.nodes if self.tree.level(el) == tree_level ] for el in node_ids: # If leaf, then n_p = 1 and n_a = 1 if el in self.leaf_ids: self.levels[el] = {} self.levels[el]['n_p'] = self.part_level self.levels[el]['n_a'] = self.part_level # If assembly, then get all children and sum all parts + assemblies else: # Get all children of node and sum levels child_ids = self.tree.is_branch(el) child_sum_p = 0 child_sum_a = 0 for el_ in child_ids: child_sum_p += self.levels[el_]['n_p'] child_sum_a += self.levels[el_]['n_a'] self.levels[el] = {} self.levels[el]['n_p'] = child_sum_p self.levels[el]['n_a'] = child_sum_a + 1 self.levels_set_p.add(child_sum_p) self.levels_set_a.add(child_sum_a + 1) # Go up through tree levels and populate lattice level dict for i in range(self.tree.depth(), -1, -1): do_level(self, i) self.create_lattice() self.levels_p_sorted = sorted(list(self.levels_set_p)) self.levels_a_sorted = sorted(list(self.levels_set_a)) # Function to return dictionary of item IDs for each lattice level def get_levels_inv(list_in, key): #Initialise levels_inv = {} levels_inv[self.part_level] = [] for el in list_in: levels_inv[el] = [] for k, v in self.levels.items(): levels_inv[v[key]].append(k) return levels_inv self.levels_p_inv = get_levels_inv(self.levels_p_sorted, 'n_p') self.levels_a_inv = get_levels_inv(self.levels_a_sorted, 'n_a') def get_all_children(self, id_): ancestors = [el.identifier for el in self.tree.children(id_)] parents = ancestors while parents: children = [] for parent in parents: children = [el.identifier for el in self.tree.children(parent)] ancestors.extend(children) parents = children return ancestors def create_lattice(self): # Create lattice self.g = nx.DiGraph() self.default_colour = 'r' # Get root node and set parent to -1 to maintain data type of "parent" # Set position to top/middle node_id = self.tree.root label_text = self.tree.get_node(node_id).tag self.g.add_node(node_id, parent=-1, label=label_text, colour=self.default_colour) # Do nodes from treelib "nodes" dictionary for key in self.tree.nodes: # Exclude root if key != self.tree.root: parent_id = self.tree.parent(key).identifier label_text = self.tree.get_node(key).tag # Node IDs same as for tree self.g.add_node(key, parent=parent_id, label=label_text, colour=self.default_colour) # Do edges from nodes for key in self.tree.nodes: # Exclude root if key != self.tree.root: parent_id = self.tree.parent(key).identifier self.g.add_edge(key, parent_id) # Escape if only one node # HR 6/3/20 QUICK BUG FIX: SINGLE-NODE TREE DOES NOT PLOT # IMPROVE LATER; SHOULD BE PART OF A GENERAL METHOD if self.tree.size() == 1: id_ = [el.identifier for el in self.tree.leaves()] self.g.nodes[id_[-1]]['pos'] = (0, 0) return # Get set of parents of leaf nodes leaf_parents = set( [self.tree.parent(el).identifier for el in self.leaf_ids]) # For each leaf_parent, set position of leaf nodes sequentially i = 0 no_leaves = len(self.tree.leaves()) for el in leaf_parents: for el_ in self.tree.is_branch(el): child_ids = [el.identifier for el in self.tree.leaves()] if el_ in child_ids: self.g.nodes[el_]['pos'] = ((i / (no_leaves)), 1) i += 1 # To set plot positions of nodes from lattice levels # --- # Traverse upwards from leaves for el in sorted(list(self.levels_set_a)): # Get all nodes at that level node_ids = [k for k, v in self.levels.items() if v['n_a'] == el] # Get all positions of children of that node # and set position as mean value of them for el_ in node_ids: child_ids = self.tree.is_branch(el_) pos_sum = 0 for el__ in child_ids: pos_ = self.g.nodes[el__]['pos'][0] pos_sum += pos_ pos_sum = pos_sum / len(child_ids) self.g.nodes[el_]['pos'] = (pos_sum, el) def print_tree(self): try: self.tree.show() except: self.create_tree() self.tree.show() def tree_to_json(self, save_to_file=False, filename='file', path=''): #TH: return json format tree, can also save to file if self.tree.size() != 0: data = self.tree.to_json() j = json.loads(data) if save_to_file == True: if path: file_path = os.path.join(path, filename) else: file_path = filename with open(file_path + '.json', 'w') as outfile: json.dump(j, outfile) return data else: print("no tree to print") return
class DependencyReader: """DependencyReader object""" def __init__(self): self.tempDirectoryPath = mkdtemp(dir=".") self.tree = Tree() self.dependencies = {} self.graphRelationships = [] def getPom(self, pomPath): shutil.copy(pomPath, self.tempDirectoryPath) os.chdir(self.tempDirectoryPath) def getDependencies(self): mavenTreeOutput = subprocess.Popen('mvn org.apache.maven.plugins:maven-dependency-plugin:RELEASE:tree -DoutputType=tgf', stdout=subprocess.PIPE, shell=True) while True: line = mavenTreeOutput.stdout.readline().rstrip() if not line or re.search(r"BUILD SUCCESS", line): break match = re.match(r"\[INFO\]\s(\d*)\s*(.*):(.*):(\w+):([0-9\.]*)", line) if match: if not match.group(1) in self.dependencies.keys(): self.dependencies[match.group(1)] = DependencyNode(match.group(2), match.group(3), match.group(5), match.group(1)) if not self.tree.leaves(): self.tree.create_node(match.group(1), match.group(1), data=self.dependencies[match.group(1)]) self.dependencies[match.group(1)].get('jar', self.tempDirectoryPath) match = re.match(r"\[INFO\]\s(\d*)\s(\d*)", line) if match and match.group(2): self.graphRelationships.append((match.group(1), match.group(2))) def relateDependencies(self): while self.graphRelationships: for item in self.graphRelationships: node = self.tree.get_node(item[0]) if node is not None: parent = self.dependencies[item[0]] child = self.dependencies[item[1]] self.tree.create_node(child.referenceId, child.referenceId, parent=parent.referenceId, data=child) self.graphRelationships.remove(item) def scanDependencies(self): # Need to run on each package with oneshot to get identifiers # unless update dosocsv2 to create identifiers on scan # or fix up dosocsv2 to create identifiers on scan instead for node in self.tree.expand_tree(mode=Tree.DEPTH): treeNode = self.tree.get_node(node) subprocess.call('dosocs2 oneshot ' + treeNode.data.jarName, shell=True) def createRelationships(self): # Pass packages as relationships to new dosocsv2 command created self.recursiveRelationship(self.tree.root) def recursiveRelationship(self, parent): for node in self.tree.is_branch(parent): parentNode = self.tree.get_node(parent) childNode = self.tree.get_node(node) subprocess.call('dosocs2 packagerelate ' + parentNode.data.jarName + ' ' + childNode.data.jarName, shell=True) self.recursiveRelationship(node) def retrieve_dependencies(self, jarName): if jarName is None: root = self.tree.get_node(self.tree.root) root = root.data.jarName else: root = jarName tgfOutput = subprocess.Popen('dosocs2 dependencies ' + root, stdout=subprocess.PIPE, shell=True) count = 0 tree = Tree() dependencies = [] relationships = [] while True: line = tgfOutput.stdout.readline() if not line: break match = re.match(r"(\d+) - (.*)", line) if match: if count == 0: count = count + 1 tree.create_node(match.group(2), match.group(1)) else: dependencies.append((match.group(2), match.group(1))) match = re.match(r"(\d+) (\d+)", line) if match: relationships.append((match.group(1), match.group(2))) if not relationships: print("No child relationships for " + jarName) return None while relationships: for item in relationships: node = tree.get_node(item[0]) if node is not None: rel = [item for item in relationships if int(item[0]) == int(node.identifier)] if rel is not None: rel = rel[0] dep = [item for item in dependencies if int(item[1]) == int(rel[1])] if dep is not None: dep = dep[0] tree.create_node(dep[0], dep[1], parent=node.identifier) relationships.remove(rel) dependencies.remove(dep) tree.show() if jarName is None: os.chdir(os.pardir)
class RIAC(AbstractTeacher): def __init__(self, mins, maxs, seed, env_reward_lb, env_reward_ub, max_region_size=200, alp_window_size=None, nb_split_attempts=50, sampling_in_leaves_only=False, min_region_size=None, min_dims_range_ratio=1 / 6, discard_ratio=1 / 4): AbstractTeacher.__init__(self, mins, maxs, env_reward_lb, env_reward_ub, seed) # Maximal number of (task, reward) pairs a region can hold before splitting self.maxlen = max_region_size self.alp_window = self.maxlen if alp_window_size is None else alp_window_size # Initialize Regions' tree self.tree = Tree() self.regions_bounds = [Box(self.mins, self.maxs, dtype=np.float32)] self.regions_alp = [0.] self.tree.create_node('root', 'root', data=Region(maxlen=self.maxlen, r_t_pairs=[ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ], bounds=self.regions_bounds[-1], alp=self.regions_alp[-1])) self.nb_dims = len(mins) self.nb_split_attempts = nb_split_attempts # Whether task sampling uses parent and child regions (False) or only child regions (True) self.sampling_in_leaves_only = sampling_in_leaves_only # Additional tricks to original RIAC, enforcing splitting rules # 1 - Minimum population required for both children when splitting --> set to 1 to cancel self.minlen = self.maxlen / 20 if min_region_size is None else min_region_size # 2 - minimum children region size (compared to initial range of each dimension) # Set min_dims_range_ratio to 1/np.inf to cancel self.dims_ranges = self.maxs - self.mins self.min_dims_range_ratio = min_dims_range_ratio # 3 - If after nb_split_attempts, no split is valid, flush oldest points of parent region # If 1- and 2- are canceled, this will be canceled since any split will be valid self.discard_ratio = discard_ratio # book-keeping self.sampled_tasks = [] self.all_boxes = [] self.all_alps = [] self.update_nb = -1 self.split_iterations = [] self.hyperparams = locals() def compute_alp(self, sub_region): if len(sub_region[0]) > 2: cp_window = min(len(sub_region[0]), self.alp_window) # not completely window half = int(cp_window / 2) # print(str(cp_window) + 'and' + str(half)) first_half = np.array(sub_region[0])[-cp_window:-half] snd_half = np.array(sub_region[0])[-half:] diff = first_half.mean() - snd_half.mean() cp = np.abs(diff) else: cp = 0 alp = np.abs(cp) return alp def split(self, nid): # Try nb_split_attempts splits on region corresponding to node <nid> reg = self.tree.get_node(nid).data best_split_score = 0 best_bounds = None best_sub_regions = None is_split = False for i in range(self.nb_split_attempts): sub_reg1 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] sub_reg2 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] # repeat until the two sub regions contain at least minlen of the mother region while len(sub_reg1[0]) < self.minlen or len( sub_reg2[0]) < self.minlen: # decide on dimension dim = self.random_state.choice(range(self.nb_dims)) threshold = reg.bounds.sample()[dim] bounds1 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32) bounds1.high[dim] = threshold bounds2 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32) bounds2.low[dim] = threshold bounds = [bounds1, bounds2] valid_bounds = True if np.any(bounds1.high - bounds1.low < self.dims_ranges * self.min_dims_range_ratio): valid_bounds = False if np.any(bounds2.high - bounds2.low < self.dims_ranges * self.min_dims_range_ratio): valid_bounds = valid_bounds and False # perform split in sub regions sub_reg1 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] sub_reg2 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] for i, task in enumerate(reg.r_t_pairs[1]): if bounds1.contains(task): sub_reg1[1].append(task) sub_reg1[0].append(reg.r_t_pairs[0][i]) else: sub_reg2[1].append(task) sub_reg2[0].append(reg.r_t_pairs[0][i]) sub_regions = [sub_reg1, sub_reg2] # compute alp alp = [self.compute_alp(sub_reg1), self.compute_alp(sub_reg2)] # compute score split_score = len(sub_reg1) * len(sub_reg2) * np.abs(alp[0] - alp[1]) if split_score >= best_split_score and valid_bounds: is_split = True best_split_score = split_score best_sub_regions = sub_regions best_bounds = bounds if is_split: # add new nodes to tree for i, (r_t_pairs, bounds) in enumerate(zip(best_sub_regions, best_bounds)): self.tree.create_node(identifier=self.tree.size(), parent=nid, data=Region(self.maxlen, r_t_pairs=r_t_pairs, bounds=bounds, alp=alp[i])) else: assert len(reg.r_t_pairs[0]) == (self.maxlen + 1) reg.r_t_pairs[0] = deque( islice(reg.r_t_pairs[0], int(self.maxlen * self.discard_ratio), self.maxlen + 1)) reg.r_t_pairs[1] = deque( islice(reg.r_t_pairs[1], int(self.maxlen * self.discard_ratio), self.maxlen + 1)) return is_split def add_task_reward(self, node, task, reward): reg = node.data nid = node.identifier if reg.bounds.contains(task): # task falls within region self.nodes_to_recompute.append(nid) children = self.tree.children(nid) for n in children: # if task in region, task is in one sub-region self.add_task_reward(n, task, reward) need_split = reg.add(task, reward, children == []) # COPY ALL MODE if need_split: self.nodes_to_split.append(nid) def episodic_update(self, task, reward, is_success): self.update_nb += 1 # Add new (task, reward) to regions nodes self.nodes_to_split = [] self.nodes_to_recompute = [] new_split = False root = self.tree.get_node('root') self.add_task_reward( root, task, reward) # Will update self.nodes_to_split if needed assert len(self.nodes_to_split) <= 1 # Split a node if needed need_split = len(self.nodes_to_split) == 1 if need_split: new_split = self.split(self.nodes_to_split[0]) # Execute the split if new_split: # Update list of regions_bounds if self.sampling_in_leaves_only: self.regions_bounds = [ n.data.bounds for n in self.tree.leaves() ] else: self.regions_bounds = [ n.data.bounds for n in self.tree.all_nodes() ] # Recompute ALPs of modified nodes for nid in self.nodes_to_recompute: node = self.tree.get_node(nid) reg = node.data reg.alp = self.compute_alp(reg.r_t_pairs) # Collect regions data (regions' ALP and regions' (task, reward) pairs) all_nodes = self.tree.all_nodes( ) if not self.sampling_in_leaves_only else self.tree.leaves() self.regions_alp = [] self.r_t_pairs = [] for n in all_nodes: self.regions_alp.append(n.data.alp) self.r_t_pairs.append(n.data.r_t_pairs) # Book-keeping if new_split: self.all_boxes.append(copy.copy(self.regions_bounds)) self.all_alps.append(copy.copy(self.regions_alp)) self.split_iterations.append(self.update_nb) assert len(self.regions_alp) == len(self.regions_bounds) return new_split, None def sample_random_task(self): return self.regions_bounds[0].sample() # First region is root region def sample_task(self): mode = self.random_state.rand() if mode < 0.1: # "mode 3" (10%) -> sample on regions and then mutate lowest-performing task in region if len(self.sampled_tasks) == 0: self.sampled_tasks.append(self.sample_random_task()) else: self.sampled_tasks.append( self.non_exploratory_task_sampling()["task"]) elif mode < 0.3: # "mode 2" (20%) -> random task self.sampled_tasks.append(self.sample_random_task()) else: # "mode 1" (70%) -> proportional sampling on regions based on ALP and then random task in selected region region_id = proportional_choice(self.regions_alp, self.random_state, eps=0.0) self.sampled_tasks.append(self.regions_bounds[region_id].sample()) return self.sampled_tasks[-1].astype(np.float32) def non_exploratory_task_sampling(self): # 1 - Sample region proportionally to its ALP region_id = proportional_choice(self.regions_alp, self.random_state, eps=0.0) # 2 - Retrieve (task, reward) pair with lowest reward worst_task_idx = np.argmin(self.r_t_pairs[region_id][0]) # 3 - Mutate task by a small amount (using Gaussian centered on task, with 0.1 std) task = self.random_state.normal( self.r_t_pairs[region_id][1][worst_task_idx].copy(), 0.1) # clip to stay within region (add small epsilon to avoid falling in multiple regions) task = np.clip(task, self.regions_bounds[region_id].low + 1e-5, self.regions_bounds[region_id].high - 1e-5) return { "task": task, "infos": { "bk_index": len(self.all_boxes) - 1, "task_infos": region_id } } def dump(self, dump_dict): dump_dict['all_boxes'] = self.all_boxes dump_dict['split_iterations'] = self.split_iterations dump_dict['all_alps'] = self.all_alps # dump_dict['riac_params'] = self.hyperparams return dump_dict @property def nb_regions(self): return len(self.regions_bounds) @property def get_regions(self): return self.regions_bounds
tree.create_node(key, key) added.add(key) node_dict.pop(key) break tree_list.append(tree) for tree in tree_list: tree.save2file("Processed_Skeleton_Trees.txt") ####################################################################################################################### # Identify end nodes (leaves): leaf_list = [] for i in range(len(tree_list)): tree = tree_list[i] leaves = tree.leaves(nid=None) for leaf in leaves: leaf = leaf.identifier leaf_list.append(leaf) # Identify paths to leaves: paths_list = [] for i in range(len(tree_list)): tree = tree_list[i] paths = tree.paths_to_leaves() paths_list.append(paths) # Identify branch points: branch_list = list(set([x for x in source_list if source_list.count(x) > 1])) # Remove somas from branch list: