def conversation_regarding_language(cursor): conversation_amount = postgres_queries.find_conversation_number(cursor) conversation_list = list() depth_dict = dict() depth_dict_long = dict() depth_dict_short = dict() number_of_tweets_dict = dict() test_i = 0 for i in range(0, conversation_amount + 1, 1): conversation_tree = Tree() conversation = postgres_queries.find_conversation(i, cursor) test_i += len(conversation) for tweet in conversation: if tweet[2] is None and tweet[5] is True: conversation_tree.create_node(tweet[0], tweet[0]) tweets_in_conversation = list() build_conversation_lang(tweet[0], conversation, conversation_tree, tweets_in_conversation) depth = conversation_tree.depth() + 1 number_of_tweets = len(conversation_tree.all_nodes()) #short/long if number_of_tweets >=20: if depth in depth_dict_long: depth_dict_long[depth] += 1 else: depth_dict_long[depth] = 1 else: if depth in depth_dict_short: depth_dict_short[depth] += 1 else: depth_dict_short[depth] = 1 if number_of_tweets in number_of_tweets_dict: number_of_tweets_dict[number_of_tweets] += 1 else: number_of_tweets_dict[number_of_tweets] = 1 if depth in depth_dict: depth_dict[depth] += 1 else: depth_dict[depth] = 1 # check if conversation_tree is null- dont add if len(conversation_tree.all_nodes())!=0: conversation_list.append(conversation_tree) # number = 0 new_tweet_list_id = list() for con in conversation_list: nodes = con.all_nodes() for node in nodes: new_tweet_list_id.append(int(node.tag)) # number += len(con.all_nodes()) # print len(new_tweet_list_id) # for tweet_id in new_tweet_list_id: # print tweet_id return new_tweet_list_id, conversation_list
class Chain: def __init__(self): self.root = Tree() self.root.create_node(0, 0) # Genesis block # Aggiunge un blocco ad una catena def add_block(self, block): node = self.root.create_node(block.epoch, hash(block), block.hash) node.data = block # Stampa graficamente la catena in forma di albero def print_chain(self): self.root.show() # restituisce i blocchi di tutta la blockchain, sottoforma di lista def return_nodes_data(self): nodes = self.root.all_nodes() nodes_data = [] for i in range(1, len(nodes)): nodes_data.append(nodes[i].data) if len(nodes_data) == 0: nodes_data.append(0) return nodes_data # Resituisce la lista degli identificativi (numeri) delle foglie dell'albero (catena) def leaves(self): leaves = self.root.leaves(nid=None) leaves_identifiers = [] for i in range(len(leaves)): leaves_identifiers.append(leaves[i].identifier) return leaves_identifiers # ritorna tutte le epoche associate ai nodi della blockchain ''' def return_nodes(self): nodes = self.root.all_nodes() nodes_identifiers = [] for i in range(len(nodes)): nodes_identifiers.append(nodes[i].tag) return nodes_identifiers ''' # restituisce il blocco (oggetto) con l'epoca maggiore nella Chain def block_max_epoch(self): nodes = self.root.all_nodes() max_epoch = 0 block_max = 0 for i in range(len(nodes)): if nodes[i].tag > max_epoch: max_epoch = nodes[i].tag block_max = nodes[i].data return block_max
def expand_night_nodes(tree: treelib.Tree): """ Expands night nodes by creating all the appropriate children nodes. """ for node in tree.all_nodes(): node_id = node.identifier path, gs, expanded = tree[node_id].data[0:3] if expanded or gs.time == 0: continue if winner(gs) != 0: tree[node_id].data = (path, gs, True) continue outcomes = night_outcomes(gs) for key, val in outcomes.items(): s = "".join([to_string(x) for x in key[0:3] ]) + (str(key[-1]) if len(key) > 3 else "") tree.create_node(f"{s} {str(val)}", node_id + "_" + s, node_id, data=(key, val, False, 0.0)) tree[node_id].data = (path, gs, True) return
def endpoint_cal(swc_p, unit , sep = ","): """ generate a multiBranch Tree from the swc file """ print(unit, sep) coords, labels, ids, pars = coords_get(swc_p, unit, sep) #coords += 1 if len(coords) == 0: print("{} is something wrong".format(swc_p)) sys.exit(0) ftree = Tree() ftree.create_node(ids[0], ids[0], data = coords[0]) for coord_, id_, par_ in zip(coords[1:], ids[1:], pars[1:]): #print(id_, par_) ftree.create_node(id_, id_, data = coord_, parent = par_) endpoint_coords = [x.data for x in ftree.leaves()] endpoint_coords.append(coords[0]) branch_coords = [x.data for x in ftree.all_nodes() if len(ftree.children(x.tag)) >= 2] endpoint_coords = np.array(endpoint_coords) branch_coords = np.array(branch_coords) coords = np.array(coords) return endpoint_coords, branch_coords, coords, ftree
def __init__(self, mol, atom0, atom1, depth=5): self.mol = mol tree = Tree() bond_order = mol.GetBondBetweenAtoms( atom0.GetIdx(), atom1.GetIdx()).GetBondTypeAsDouble() tree.create_node(tag=[atom0.GetAtomicNum(), bond_order], identifier=atom0.GetIdx(), data=atom0) tree.create_node(tag=[atom1.GetAtomicNum(), bond_order], identifier=atom1.GetIdx(), data=atom1, parent=atom0.GetIdx()) for _ in range(depth): for node in tree.all_nodes(): if node.is_leaf(): for atom in node.data.GetNeighbors(): tree_id = tree._identifier if atom.GetIdx() != node.predecessor(tree_id=tree_id): order = mol.GetBondBetweenAtoms( atom.GetIdx(), node.data.GetIdx()).GetBondTypeAsDouble() identifier = atom.GetIdx() while tree.get_node(identifier) is not None: identifier += len(mol.GetAtoms()) tree.create_node(tag=[atom.GetAtomicNum(), order], identifier=identifier, data=atom, parent=node.identifier) self.tree = tree
def sum_orbits(orbital_tree: Tree) -> int: sum_total_orbits = 0 for node in orbital_tree.all_nodes(): sum_total_orbits += orbital_tree.depth(node) return sum_total_orbits
def reset_ids(t: tl.Tree) -> tl.Tree: for node in t.all_nodes(): if node.is_leaf(): t.update_node(node.identifier, tag=node.tag, identifier=node.tag) else: t.update_node(node.identifier, identifier=uid()) return t
class BlockChain: def __init__(self): self.root = Tree() self.root.create_node(0, 0) # Genesis block # Adds a block to a blockchain def add_block(self, block): node = self.root.create_node(block.epoch, hash(block), block.hash) node.data = block # Print the blockchain graphically def print_chain(self): self.root.show() # Returns the list containing all the blocks of the entire blockchain def return_nodes_data(self): nodes = self.root.all_nodes() nodes_data = [] for i in range(1, len(nodes)): nodes_data.append(nodes[i].data) if len(nodes_data) == 0: nodes_data.append(0) return nodes_data # Returns the list of identifiers of the leaves of the tree (blockchain) def leaves(self): leaves = self.root.leaves(nid=None) leaves_identifiers = [] for i in range(len(leaves)): leaves_identifiers.append(leaves[i].identifier) return leaves_identifiers # returns the Block object with the major epoch in the blockchain def block_max_epoch(self): nodes = self.root.all_nodes() max_epoch = 0 block_max = 0 for i in range(len(nodes)): if nodes[i].tag > max_epoch: max_epoch = nodes[i].tag block_max = nodes[i].data return block_max
def types_of_conversation(): conversation_amount = postgres_queries.find_annotated_conversation_number() conversation_list = list() depth_dict = dict() depth_dict_long = dict() depth_dict_short = dict() number_of_tweets_dict = dict() for i in range (0, conversation_amount + 1, 1): conversation_tree = Tree() converastion = postgres_queries.find_conversation(i) for tweet in converastion: if tweet[1] is None: conversation_tree.create_node(tweet[0], tweet[0]) build_conversation(tweet[0], converastion, conversation_tree) depth = conversation_tree.depth() + 1 number_of_tweets = len(conversation_tree.all_nodes()) #short/long if number_of_tweets >=20: if depth in depth_dict_long: depth_dict_long[depth] += 1 else: depth_dict_long[depth] = 1 else: if depth in depth_dict_short: depth_dict_short[depth] += 1 else: depth_dict_short[depth] = 1 if number_of_tweets in number_of_tweets_dict: number_of_tweets_dict[number_of_tweets] += 1 else: number_of_tweets_dict[number_of_tweets] = 1 if depth in depth_dict: depth_dict[depth] += 1 else: depth_dict[depth] = 1 conversation_list.append(conversation_tree) #print depth_dict print 'Depth of a conversation' for depth, count in depth_dict.iteritems(): print depth, '\t', count print 'Number of tweets in a conversation' for number, count in number_of_tweets_dict.iteritems(): print number, '\t', count print 'Depth of a long conversation' for depth, count in depth_dict_long.iteritems(): print depth, '\t', count print 'Depth of a short conversation' for depth, count in depth_dict_short.iteritems(): print depth, '\t', count return conversation_list
class DecisionTreeClassifier(object): def __init__(self): self.tree = Tree() def split(self, node, data, attr): '''algorithm to recursively split nodes by best attribute until decision tree is constructed''' attr_vals = get_values(data, attr) #gets all possible values for attribute in data for val in attr_vals: new_data = data[data[attr] == val] #splits data by attribute val new_attr = get_best_attribute(new_data) # this is the next attribute to split on new_node = self.tree.create_node(val, len(self.tree.all_nodes()), node.identifier, data = DecisionTreeNode(data=new_data, attribute = new_attr)) if (not(is_pure(new_data))): #recursively calls split if data isn't pure self.split(new_node, new_data, new_attr) def fit(self, X, y): '''Build a decision tree classifier from the training set (X,y)''' data = pd.concat([X, y], axis = 1) attr = get_best_attribute(data) node = clas.tree.create_node("Root", "root", data = DecisionTreeNode(data, attr)) clas.split(node, node.data.data, attr) def predict(self, X): '''Predict class value for X''' preds = [] for row in X.itertuples(): #iterates through each row of X node_cur = self.tree['root'] #initializes current node to root while not(node_cur.is_leaf()): attr = node_cur.data.attribute #attribute that the current node will be split on val = getattr(row, attr) #value of attribute at current row node_cur_children = node_cur.fpointer #returns list of children node ids for node_id in node_cur_children: #iterates through all children of node_cur node = self.tree[node_id] if(node.tag == val): #if node's tag matches val node_cur = node preds.append(node_cur.data.get_pred()) return preds
def crossOver(individualA, individualB): tree = None while tree is None or tree.depth(tree.get_node(tree.root)) > TREE_MAX_DEPTH: treeA = Tree(tree = individualA.tree, deep=True) treeB = Tree(tree = individualB.tree, deep=True) regenerate_ids(treeA) regenerate_ids(treeB) removedNode = random.choice(treeA.all_nodes()) addedNode = random.choice(treeB.all_nodes()) addedSubtree = Tree(tree = treeB.subtree(addedNode.identifier), deep=True) if treeA.root == removedNode.identifier: tree = addedSubtree else: parent = treeA.parent(removedNode.identifier) treeA.remove_subtree(removedNode.identifier) treeA.paste(parent.identifier, addedSubtree) tree = treeA return Individual(tree)
def conversation_regarding_language(): # with width and depth conversation_amount = postgres_queries.find_annotated_conversation_number() conversation_list = list() depth_dict = dict() depth_dict_long = dict() depth_dict_short = dict() number_of_tweets_dict = dict() test_i = 0 for i in range(0, conversation_amount + 1, 1): conversation_tree = Tree() converastion = postgres_queries.find_conversation(i) test_i += len(converastion) for tweet in converastion: if tweet[1] is None and tweet[5] is True: conversation_tree.create_node(tweet[0], tweet[0]) build_conversation_lang(tweet[0], converastion, conversation_tree) depth = conversation_tree.depth() + 1 number_of_tweets = len(conversation_tree.all_nodes()) #short/long if number_of_tweets >=20: if depth in depth_dict_long: depth_dict_long[depth] += 1 else: depth_dict_long[depth] = 1 else: if depth in depth_dict_short: depth_dict_short[depth] += 1 else: depth_dict_short[depth] = 1 if number_of_tweets in number_of_tweets_dict: number_of_tweets_dict[number_of_tweets] += 1 else: number_of_tweets_dict[number_of_tweets] = 1 if depth in depth_dict: depth_dict[depth] += 1 else: depth_dict[depth] = 1 conversation_list.append(conversation_tree) number = 0 new_tweet_list_id = list() for con in conversation_list: nodes = con.all_nodes() for node in nodes: new_tweet_list_id.append(node.tag) number += len(con.all_nodes()) return new_tweet_list_id, conversation_list
def createTree(showDescript=False): tree = Tree() with open('savedFiles/rulesTxt/%s.txt' % fileName, 'r') as fin: contentList = fin.readlines() currParent = None for line in contentList: if not line.startswith('V-'): # new category here, change parent currNode, parentNode = map(lambda s: s.strip(), line.split(',')) currParent = currNode # set the new parent for following lines # now add node to tree if parentNode == 'NONE': parentNode = None tree.create_node(currNode, currNode, parent=parentNode) else: # is a rule, add to current category ruleNum, ruleDescript = parseRule(line) # TODO if showDescript: value = "%s: %s" % (ruleNum, ruleDescript) else: value = ruleNum tree.create_node(value, ruleNum, parent=currParent) # now strip away the \n randomly added to the end of value in every node for nodeObj in tree.all_nodes(): nodeObj.tag = nodeObj.tag.strip() return tree
def __call__(self, inputs,words,dep,is_train=True): """ :param xs: a list of ngrams (or words if win is set to 1) :return: embeddings looked from tables """ list=eval(dep) tree = Tree() finish=False err_node=[] root_index=0 while 1: if finish: break; if len(tree.all_nodes())==len(list): finish=True; for i in range(0,len(list)): arr=list[i] parentIdx=arr[1] nodeIdx=arr[2] if not tree.contains(nid=nodeIdx): if i==0: tree.create_node(words[nodeIdx-1],identifier=nodeIdx) root_index=nodeIdx-1 else: if tree.contains(nid=parentIdx): tree.create_node(words[nodeIdx-1],identifier=nodeIdx,parent=parentIdx) H=[] for idx in range(0,len(inputs)): h=self.expr_for_tree(xt=inputs[idx],tree=tree,node=tree.get_node(idx+1),is_train=is_train) H.append(h) return H,root_index
def build_tree(arg): # read parameters start = time.time() dist_matrix_file = arg[0] cls_file = arg[1] tree_dir = arg[2] ksize = arg[3] params = arg[4] alpha_ratio = params[0] minsize = params[1] maxsize = params[2] max_cls_size = params[3] # save genomes info fna_seq = bidict.bidict() # : 1 fna_path = {} # read dist matrix (represented by similarity: 1-dist) # output: dist, fna_path, fna_seq f = open(dist_matrix_file, "r") lines = f.readlines() f.close() index = 0 d = lines[0].rstrip().split("\t")[1:] bac_label = 0 for i in lines[0].rstrip().split("\t")[1:]: temp = i[i.rfind('/') + 1:].split(".")[0] fna_seq[temp] = index fna_path[index] = i index += 1 dist = [] for line in lines[1:]: dist.append( [np.array(list(map(float, line.rstrip().split("\t")[1:])))]) dist = np.concatenate(dist) # read initial clustering results. fna_mapping, from 1 for indexing f = open(cls_file, 'r') lines = f.readlines() f.close() fna_mapping = defaultdict(set) for line in lines: temp = line.rstrip().split("\t") for i in temp[2].split(","): fna_mapping[int(temp[0])].add(fna_seq[i]) if (len(lines) == 1): tree = Tree() kmer_sta = defaultdict(int) T0 = Node(identifier=list(fna_mapping.keys())[0]) tree.add_node(T0) kmer_sta = defaultdict(int) kmer_index_dict = bidict.bidict() kmer_index = 1 alpha_ratio = 1 Lv = set() for i in fna_mapping[T0.identifier]: for seq_record in SeqIO.parse(fna_path[i], "fasta"): temp = str(seq_record.seq) for k in range(0, len(temp) - ksize): forward = temp[k:k + ksize] reverse = seqpy.revcomp(forward) for kmer in [forward, reverse]: try: kmer_sta[kmer_index_dict[kmer]] += 1 except KeyError: kmer_index_dict[kmer] = kmer_index kmer_sta[kmer_index] += 1 kmer_index += 1 alpha = len(fna_mapping[T0.identifier]) * alpha_ratio for x in kmer_sta: if (kmer_sta[x] >= alpha): Lv.add(x) print(T0.identifier, len(Lv)) # save2file kmerlist = set() pkl.dump(tree, open(tree_dir + '/tree.pkl', 'wb')) f = open(tree_dir + "/tree_structure.txt", "w") os.system("mkdir " + tree_dir + "/kmers") os.system("mkdir " + tree_dir + "/overlapping_info") f.write("%d\t" % T0.identifier) f.close() os.system(f'cp {cls_file} {tree_dir}/') f = open(tree_dir + "/reconstructed_nodes.txt", "w") f.close() if (len(Lv) > maxsize): Lv = set(random.sample(Lv, maxsize)) kmerlist = Lv length = len(Lv) f = open(tree_dir + "/kmers/" + str(T0.identifier), "w") for j in Lv: f.write("%d " % j) f.close() f = open(tree_dir + "/node_length.txt", "w") f.write("%d\t%d\n" % (T0.identifier, length)) kmer_mapping = {} index = 0 f = open(tree_dir + "/kmer.fa", "w") for i in kmerlist: f.write(">1\n") f.write(kmer_index_dict.inv[i]) kmer_mapping[i] = index index += 1 f.write("\n") f.close() # change index files = os.listdir(tree_dir + "/kmers") for i in files: f = open(tree_dir + "/kmers/" + i, "r") lines = f.readlines() if (len(lines) == 0): continue d = lines[0].rstrip().split(" ") d = map(int, d) f = open(tree_dir + "/kmers/" + i, "w") for j in d: f.write("%d " % kmer_mapping[j]) f.close() end = time.time() print( '- The total running time of tree-based indexing struture building is ', str(end - start), ' s\n') return # initially build tree cls_dist, mapping, tree, depths, depths_mapping = hierarchy( fna_mapping, dist) # initially extract k-mers kmer_index_dict = bidict.bidict() kmer_index = 1 Lv = defaultdict(set) spec = defaultdict(set) # k-mers <= alpha leaves = tree.leaves() for i in leaves: kmer_index = extract_kmers(fna_mapping[i.identifier], fna_path, ksize, kmer_index_dict, kmer_index, Lv, spec, tree_dir, alpha_ratio, i.identifier) end = time.time() print('- The total running time of k-mer extraction is ', str(end - start), ' s\n') start = time.time() # leaf nodes check recls_label = 0 leaves_check = [] check_waitlist = reversed(leaves) while (True): if (recls_label): cls_dist, mapping, tree, depths, depths_mapping = hierarchy( fna_mapping, dist) leaves = tree.leaves() temp = {} temp2 = [] for i in check_waitlist: if (i in fna_mapping): temp2.append(i) check_waitlist = temp2.copy() for i in check_waitlist: temp[tree.get_node(i)] = depths[tree.get_node(i)] check_waitlist = [] a = sorted(temp.items(), key=lambda x: x[1], reverse=True) for i in a: check_waitlist.append(i[0]) for i in fna_mapping: if (i not in Lv): kmer_index = extract_kmers(fna_mapping[i], fna_path, ksize, kmer_index_dict, kmer_index, Lv, spec, tree_dir, alpha_ratio, i) higher_union = defaultdict(set) for i in check_waitlist: diff, diff_nodes = get_leaf_union(depths[i], higher_union, depths_mapping, Lv, spec, i) kmer_t = Lv[i.identifier] - diff for j in diff_nodes: kmer_t = kmer_t - Lv[j.identifier] for j in diff_nodes: kmer_t = kmer_t - spec[j.identifier] print(str(i.identifier) + " checking", end="\t") print(len(kmer_t)) if (len(kmer_t) < minsize): leaves_check.append(i) if (len(leaves_check) > 0): recls_label = 1 else: break # re-clustering check_waitlist = [] while (recls_label == 1): cluster_id = max(list(fna_mapping.keys())) + 1 check_waitlist.append(cluster_id) leaf_a = leaves_check[0].identifier row_index = mapping[leaf_a] column_index = cls_dist[row_index].argmax() leaf_b = mapping.inv[column_index] # (leaf_a, leaf_b) temp2 = fna_mapping[leaf_a] | fna_mapping[leaf_b] print(cluster_id, leaf_a, leaf_b, temp2) del fna_mapping[leaf_a], fna_mapping[leaf_b] if (leaf_a in Lv): del Lv[leaf_a], spec[leaf_a] if (leaf_b in Lv): del Lv[leaf_b], spec[leaf_b] del leaves_check[0] if (tree.get_node(leaf_b) in leaves_check): leaves_check.remove(tree.get_node(leaf_b)) temp1 = [ np.concatenate([[cls_dist[row_index]], [cls_dist[column_index]]]).max(axis=0) ] cls_dist = np.concatenate([cls_dist, temp1], axis=0) temp1 = np.append(temp1, -1) temp1 = np.vstack(temp1) cls_dist = np.concatenate([cls_dist, temp1], axis=1) cls_dist = np.delete(cls_dist, [row_index, column_index], axis=0) cls_dist = np.delete(cls_dist, [row_index, column_index], axis=1) # change mapping del mapping[leaf_a], mapping[leaf_b] pending = list(fna_mapping.keys()) pending.sort() for i in pending: if (mapping[i] > min([row_index, column_index]) and mapping[i] < max([row_index, column_index])): mapping[i] -= 1 elif (mapping[i] > max([row_index, column_index])): mapping[i] -= 2 fna_mapping[cluster_id] = temp2 mapping[cluster_id] = len(cls_dist) - 1 if (len(leaves_check) == 0): break del higher_union # rebuild identifiers all_nodes = tree.all_nodes() all_leaves_id = set([]) leaves = set(tree.leaves()) for i in leaves: all_leaves_id.add(i.identifier) id_mapping = bidict.bidict() index = 1 index_internal = len(leaves) + 1 for i in all_nodes: if (recls_label == 0): id_mapping[i.identifier] = i.identifier elif (i in leaves): id_mapping[i.identifier] = index index += 1 else: id_mapping[i.identifier] = index_internal index_internal += 1 leaves_identifier = list(range(1, len(leaves) + 1)) all_identifier = list(id_mapping.values()) all_identifier.sort() # save2file f = open(tree_dir + "/tree_structure.txt", "w") os.system("mkdir " + tree_dir + "/kmers") os.system("mkdir " + tree_dir + "/overlapping_info") for nn in all_identifier: i = id_mapping.inv[nn] f.write("%d\t" % id_mapping[i]) if (i == all_nodes[0].identifier): f.write("N\t") else: f.write("%d\t" % id_mapping[tree.parent(i).identifier]) if (nn in leaves_identifier): f.write("N\t") else: [child_a, child_b] = tree.children(i) f.write("%d %d\t" % (id_mapping[child_a.identifier], id_mapping[child_b.identifier])) if (len(fna_mapping[i]) == 1): temp = list(fna_mapping[i])[0] temp = fna_seq.inv[temp] f.write("%s" % temp) f.write("\n") f.close() f = open(tree_dir + "/hclsMap_95_recls.txt", "w") for nn in leaves_identifier: i = id_mapping.inv[nn] f.write("%d\t%d\t" % (nn, len(fna_mapping[i]))) temp1 = list(fna_mapping[i]) for j in temp1: temp = fna_seq.inv[j] if (j == temp1[-1]): f.write("%s\n" % temp) else: f.write("%s," % temp) f.close() end = time.time() print('- The total running time of re-clustering is ', str(end - start), ' s\n') start = time.time() # build indexing structure kmerlist = set([]) # all kmers used length = {} overload_label = 0 if (len(tree.leaves()) > max_cls_size): overload_label = 1 # from bottom to top (unique k-mers) uniq_temp = defaultdict(set) rebuilt_nodes = [] descendant = defaultdict(set) # including itself ancestor = defaultdict(set) descendant_leaves = defaultdict(set) ancestor[all_nodes[0].identifier].add(all_nodes[0].identifier) for i in all_nodes[1:]: ancestor[i.identifier] = ancestor[tree.parent( i.identifier).identifier].copy() ancestor[i.identifier].add(i.identifier) for i in reversed(all_nodes): print(str(id_mapping[i.identifier]) + " k-mer removing...") if (i in leaves): uniq_temp[i.identifier] = Lv[i.identifier] descendant_leaves[i.identifier].add(i.identifier) else: (child_a, child_b) = tree.children(i.identifier) descendant[i.identifier] = descendant[ child_a.identifier] | descendant[child_b.identifier] descendant_leaves[i.identifier] = descendant_leaves[ child_a.identifier] | descendant_leaves[child_b.identifier] uniq_temp[i.identifier] = uniq_temp[ child_a.identifier] & uniq_temp[child_b.identifier] uniq_temp[child_a.identifier] = uniq_temp[ child_a.identifier] - uniq_temp[i.identifier] uniq_temp[child_b.identifier] = uniq_temp[ child_b.identifier] - uniq_temp[i.identifier] descendant[i.identifier].add(i.identifier) all_nodes_id = set(id_mapping.keys()) # remove overlapping for i in reversed(all_nodes): print(str(id_mapping[i.identifier]) + " k-mer set building...") # no difference with sibling, subtree and ancestors if (i == all_nodes[0]): kmer_t = uniq_temp[i.identifier] else: diff = {} temp = all_nodes_id - descendant[i.identifier] - set([ tree.siblings(i.identifier)[0].identifier ]) - ancestor[i.identifier] for j in temp: diff[j] = len(uniq_temp[j]) a = sorted(diff.items(), key=lambda x: x[1], reverse=True) kmer_t = uniq_temp[i.identifier] for j in a: k = j[0] kmer_t = kmer_t - uniq_temp[k] # remove special k-mers temp = all_leaves_id - descendant_leaves[i.identifier] diff = {} for j in temp: diff[j] = len(spec[j]) a = sorted(diff.items(), key=lambda x: x[1], reverse=True) for j in a: k = j[0] kmer_t = kmer_t - spec[k] if (len(kmer_t) < minsize and overload_label == 0): rebuilt_nodes.append(i) print("%d waiting for reconstruction..." % id_mapping[i.identifier]) else: if (len(kmer_t) > maxsize): kmer_t = set(random.sample(kmer_t, maxsize)) f = open(tree_dir + "/kmers/" + str(id_mapping[i.identifier]), "w") for j in kmer_t: f.write("%d " % j) f.close() length[i] = len(kmer_t) kmerlist = kmerlist | kmer_t del uniq_temp # rebuild nodes overlapping = defaultdict(dict) intersection = defaultdict(set) higher_union = defaultdict(set) del_label = {} for i in leaves: del_label[i.identifier] = [0, 0] for i in rebuilt_nodes: print(str(id_mapping[i.identifier]) + " k-mer set rebuilding...") kmer_t = get_intersect(intersection, descendant_leaves[i.identifier], Lv, del_label, i.identifier) diff = get_diff(higher_union, descendant_leaves, depths, all_nodes, i, Lv, spec, del_label) for j in diff: kmer_t = kmer_t - j lower_leaves = set([]) for j in leaves: if (depths[j] < depths[i]): lower_leaves.add(j) if (len(kmer_t) > maxsize): kmer_overlapping_sta = defaultdict(int) for j in lower_leaves: kmer_o = Lv[j.identifier] & kmer_t for k in kmer_o: kmer_overlapping_sta[k] += 1 temp = sorted(kmer_overlapping_sta.items(), key=lambda kv: (kv[1], kv[0])) kmer_t = set([]) for j in range(0, maxsize): kmer_t.add(temp[j][0]) nkmer = {} f = open(tree_dir + "/kmers/" + str(id_mapping[i.identifier]), "w") index = 0 for j in kmer_t: f.write("%d " % j) nkmer[j] = index index += 1 length[i] = len(kmer_t) kmerlist = kmerlist | kmer_t # save overlapping info for j in lower_leaves: temp = Lv[j.identifier] & kmer_t if (len(temp) > 0): ii = id_mapping[i.identifier] jj = id_mapping[j.identifier] overlapping[jj][ii] = set([]) for k in temp: overlapping[jj][ii].add(nkmer[k]) delete(Lv, spec, del_label) for i in overlapping: f = open(tree_dir + "/overlapping_info/" + str(i), "w") f1 = open(tree_dir + "/overlapping_info/" + str(i) + "_supple", "w") count = -1 for j in overlapping[i]: if (len(overlapping[i]) != 0): f.write("%d\n" % j) for k in overlapping[i][j]: f.write("%d " % k) f.write("\n") count += 2 f1.write("%d %d\n" % (j, count)) f.close() f1.close() # final saving f = open(tree_dir + "/reconstructed_nodes.txt", "w") for i in rebuilt_nodes: f.write("%d\n" % id_mapping[i.identifier]) f.close() f = open(tree_dir + "/node_length.txt", "w") for nn in all_identifier: i = id_mapping.inv[nn] f.write("%d\t%d\n" % (nn, length[tree[i]])) f.close() kmer_mapping = {} index = 0 f = open(tree_dir + "/kmer.fa", "w") for i in kmerlist: f.write(">1\n") f.write(kmer_index_dict.inv[i]) kmer_mapping[i] = index index += 1 f.write("\n") f.close() # change index files = os.listdir(tree_dir + "/kmers") for i in files: f = open(tree_dir + "/kmers/" + i, "r") lines = f.readlines() if (len(lines) == 0): continue d = lines[0].rstrip().split(" ") d = map(int, d) f = open(tree_dir + "/kmers/" + i, "w") for j in d: f.write("%d " % kmer_mapping[j]) f.close() end = time.time() print( '- The total running time of tree-based indexing struture building is ', str(end - start), ' s\n')
class Blockchain(object): def __init__(self, genesis): # TODO: figure out if genesis should be passed in or created here # self.tinput = tinput self.blockCount = 0 self.blockchain = Tree() self.genesis = genesis self.addGenesisBlock(genesis) #Add the genesis block to chain def addGenesisBlock(self, genesis): self.blockchain.create_node("Genesis Block" + " ID: " + genesis.proofOfWork[:12], genesis.proofOfWork, data=genesis) def printBlockchain(self): self.blockchain.show() def addBlock(self, block): # TODO: run proof of work verification before adding block # Add block to chain & return true if POW valid # Else return false self.blockCount += 1 self.blockchain.create_node("Block " + str(self.blockCount) + " ID: " + block.proofOfWork[:12], block.proofOfWork, parent=block.prevBlockHash, data=block) def getGenesisID(self): return self.blockchain.root def getLongestChainBlocks(self): allNodes = self.blockchain.all_nodes() forkNum = 0 #number of leaves at longest branch treeDepth = self.blockchain.depth() longestPathLeaves = [ ] #WIll hold leaves with treeDepth depth ie longest branch(es) for node in allNodes: currentDepth = self.blockchain.depth(node) if (currentDepth == treeDepth): forkNum += 1 longestPathLeaves.append(node) return forkNum, longestPathLeaves def blockchainLength(self): # returns the depth of the tree ie the length of # the longest chain return self.blockchain.depth() def numBlocks(self): return self.blockchain.size() def printChain(self, chain): chain.show(data_property="humanID") def tailBlocks(self, chain): leaves = chain.leaves() print("Num leaves" + str(len(leaves))) print(leaves) def checkBlock(self): # Check the proof work work # return true if proof of work is valid # else rerturn false print("printing block") def createBlockchainGraph(self, outfilename): print("creating graph") self.blockchain.to_graphviz(filename=outfilename + '.gv', shape=u'box', graph=u'digraph') g = Source.from_file(outfilename + '.gv') g.render() def createBlockchainImg(self, outfilename): print("creating graph") self.blockchain.to_graphviz(filename=outfilename + '.gv', shape=u'box', graph=u'digraph') g = Source.from_file(outfilename + '.png') g.render()
class BasicTree: def __init__(self, vehsInfo): self.tree = Tree() self.root = self.tree.create_node("Root", "root") # root node self.vehsInfo = vehsInfo self.vehList = list(vehsInfo.keys()) self.i = 1 def _build(self, currentNode, vehList): ''' :param vehList: A dict, keys is the set of vehicles, value is a tuple which represents (lane, position) :param currentNode: The current node in the tree :return: None ''' s = [currentNode.tag.find(vid) for vid in vehList] # the quit contidion in recursion if (np.array(s) >= 0).all(): return for vehId in vehList: if vehId not in currentNode.tag: if currentNode.is_root: prefix = currentNode.tag.replace("Root", "") else: prefix = currentNode.tag self.tree.create_node(prefix + vehId + "-", prefix + vehId, parent=currentNode) for node in self.tree.all_nodes(): if node.is_leaf(): self._build(currentNode=node, vehList=vehList) def _prune(self): laneId = [value[0] for value in self.vehsInfo.values()] sortedList = [] for i in list(set(laneId)): lane_info = {k: v[1] for k, v in self.vehsInfo.items() if v[0] == i} # Vehicles in front are at the front of the lane sortedList.append([vid[0] for vid in sorted(lane_info.items(), key=itemgetter(1), reverse=True)]) pruneList = [sublist for sublist in sortedList if len(sublist) > 1] for subList in pruneList: for index in range(1, len(subList)): # first, prune th subtree which begin with illegal vehicle id self.tree.remove_subtree(subList[index]) # second, delete the nodes which match the illegal pattern pattern = subList[index] + ".*" + subList[0] for node in self.tree.all_nodes(): if re.search(pattern, node.tag): try: self.tree.remove_node(node.identifier) except: pass def build(self): self._build(self.root, self.vehList) self._prune() def show(self): self.tree.show() def _leaves(self): ''' :return: All the plan for vehicle passing currently. ''' all_nodes = self.tree.all_nodes() return [node for node in all_nodes if node.is_leaf()] def legal_orders(self): leaves = self._leaves() orders = [] for pattern in leaves: # upToRight.1-leftToBelow.18-belowToRight.2-belowToRight.3- tmp = pattern.tag.split("-") try: tmp.remove('') except: pass if len(tmp) == self.tree.depth(): orders.append(tmp) return orders
identifier=json_data[0].get("mid"), data=Info(json_data[0], param=param)) uu = 1000 if len(json_data) >= 1000 else len(json_data) # print('uu:',uu) for j in range(1, uu): try: tree.create_node(tag=json_data[j].get("mid"), identifier=json_data[j].get("mid"), parent=json_data[j].get("parent"), data=Info(json_data[j], param=param)) except: pass # 单颗传播树构建完成 # 对传播树进行简化 print('简化前:tree_depth:', tree.depth(), 'tree_nodes:', len(tree.all_nodes())) tree = simplify_tree(tree) print('***********简化后:tree_depth:', tree.depth(), 'tree_nodes:', len(tree.all_nodes())) trees.append(tree) # print(eid,"---trees simplified") # avg_tree_size += tree.size() # avg_trees_size.append(int(avg_tree_size/data_array.shape[0])) # plt.plot(params, avg_trees_size,'ro-') # plt.xlabel('alpha') # plt.ylabel('trees avg size') # plt.show() # plt.savefig('./img/img_1.jpg') # # 提取新闻原文的相关特征,并写入文件 # feature = extract_features(Info(json_data[0]))
parent_check[right] = left parent_check_reverse[left] = right nodeids.append(left) nodeids.append(right) for nodeid in nodeids: if nodeid in parent_check: continue else: rootid = nodeid unique_ids = set(nodeids) tree.create_node(tag=rootid,identifier=rootid) while len(unique_ids) != len(tree.all_nodes()): for rightNode in list(parent_check.keys()): if tree.get_node(parent_check[rightNode]) is not None and tree.get_node(rightNode) is None: tree.create_node(tag=rightNode, identifier=rightNode, parent=tree.get_node(parent_check[rightNode])) for node in tree.all_nodes(): sum += tree.depth(node) print(sum) def intersection(lst1, lst2): lst3 = [value for value in lst1 if value in lst2] return lst3 sanIndex = 0
def tree_build_from_list(containers): """ Build a tree based on a unsorted list. Build a tree of containers based on an unsorted list of containers. Example: -------- >>> containers = [ { "childContainerKey": null, "configlets": [], "devices": [], "imageBundle": "", "key": "root", "name": "Tenant", "parentName": null }, { "childContainerKey": null, "configlets": [ "veos3-basic-configuration" ], "devices": [ "veos-1" ], "imageBundle": "", "key": "container_43_840035860469981", "name": "staging", "parentName": "Tenant" }] >>> print(tree_build_from_list(containers=containers)) {"Tenant": {"children": [{"Fabric": {"children": [{"Leaves": {"children": ["MLAG01", "MLAG02"]}}, "Spines"]}}]}} Parameters ---------- containers : dict, optional Container topology to create on CVP, by default None Returns ------- json tree topology """ # Create tree object tree = Tree() # Create the base node previously_created = list() # Create root node to mimic CVP behavior tree.create_node("Tenant", "Tenant") # Iterate for first level of containers directly attached under root. for cvp_container in containers: if cvp_container['parentName'] is None: continue elif cvp_container['parentName'] in ['Tenant']: previously_created.append(cvp_container['name']) tree.create_node(cvp_container['name'], cvp_container['name'], parent=cvp_container['parentName']) # Loop since expected tree is not equal to number of entries in container topology while len(tree.all_nodes()) < len(containers): for cvp_container in containers: if tree.contains( cvp_container['parentName'] ): # and cvp_container['parentName'] not in ['Tenant'] try: tree.create_node(cvp_container['name'], cvp_container['name'], parent=cvp_container['parentName']) except: # noqa E722 continue return tree.to_json()
class SAGG_BRIAC(): def __init__(self, min, max, temperature=20): # example --> min: [-1,-1] max: [1,1] assert len(min) == len(max) self.maxlen = 200 self.window_cp = 200 self.minlen = self.maxlen / 20 self.maxregions = 80 # init regions' tree self.tree = Tree() self.regions_bounds = [Box(min, max, dtype=np.float32)] self.interest = [0.] self.tree.create_node('root','root',data=Region(maxlen=self.maxlen, cps_gs=[deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)], bounds=self.regions_bounds[-1], interest=self.interest[-1])) self.nb_dims = len(min) self.temperature = temperature self.nb_split_attempts = 50 self.max_difference = 0.2 self.init_size = max - min self.ndims = len(min) self.mode_3_noise = 0.1 # book-keeping self.sampled_tasks = [] self.all_boxes = [] self.all_interests = [] self.update_nb = 0 self.split_iterations = [] def compute_interest(self, sub_region): if len(sub_region[0]) > self.minlen: # TRICK NB 4 cp_window = min(len(sub_region[0]), self.window_cp) # not completely window half = int(cp_window / 2) # print(str(cp_window) + 'and' + str(half)) first_half = np.array(sub_region[0])[-cp_window:-half] snd_half = np.array(sub_region[0])[-half:] diff = first_half.mean() - snd_half.mean() cp = np.abs(diff) else: cp = 0 interest = np.abs(cp) return interest def split(self, nid): # try nb_split_attempts splits reg = self.tree.get_node(nid).data best_split_score = 0 best_abs_interest_diff = 0 best_bounds = None best_sub_regions = None is_split = False for i in range(self.nb_split_attempts): sub_reg1 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)] sub_reg2 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)] # repeat until the two sub regions contain at least minlen of the mother region TRICK NB 1 while len(sub_reg1[0]) < self.minlen or len(sub_reg2[0]) < self.minlen: # decide on dimension dim = np.random.choice(range(self.nb_dims)) threshold = reg.bounds.sample()[dim] bounds1 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32) bounds1.high[dim] = threshold bounds2 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32) bounds2.low[dim] = threshold bounds = [bounds1, bounds2] valid_bounds = True if np.any(bounds1.high - bounds1.low < self.init_size / 15): # to enforce not too small boxes TRICK NB 2 valid_bounds = False if np.any(bounds2.high - bounds2.low < self.init_size / 15): valid_bounds = valid_bounds and False # perform split in sub regions sub_reg1 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)] sub_reg2 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)] for i, task in enumerate(reg.cps_gs[1]): if bounds1.contains(task): sub_reg1[1].append(task) sub_reg1[0].append(reg.cps_gs[0][i]) else: sub_reg2[1].append(task) sub_reg2[0].append(reg.cps_gs[0][i]) sub_regions = [sub_reg1, sub_reg2] # compute interest interest = [self.compute_interest(sub_reg1), self.compute_interest(sub_reg2)] # compute score split_score = len(sub_reg1) * len(sub_reg2) * np.abs(interest[0] - interest[1]) if split_score >= best_split_score and valid_bounds: # TRICK NB 3, max diff #and np.abs(interest[0] - interest[1]) >= self.max_difference / 8 is_split = True best_abs_interest_diff = np.abs(interest[0] - interest[1]) best_split_score = split_score best_sub_regions = sub_regions best_bounds = bounds if is_split: if best_abs_interest_diff > self.max_difference: self.max_difference = best_abs_interest_diff # add new nodes to tree for i, (cps_gs, bounds) in enumerate(zip(best_sub_regions, best_bounds)): self.tree.create_node(parent=nid, data=Region(self.maxlen, cps_gs=cps_gs, bounds=bounds, interest=interest[i])) else: #print("abort mission") # TRICK NB 6, remove old stuff if can't find split assert len(reg.cps_gs[0]) == (self.maxlen + 1) reg.cps_gs[0] = deque(islice(reg.cps_gs[0], int(self.maxlen / 4), self.maxlen + 1)) reg.cps_gs[1] = deque(islice(reg.cps_gs[1], int(self.maxlen / 4), self.maxlen + 1)) return is_split def merge(self, all_nodes): # get a list of children pairs parent_children = [] for n in all_nodes: if not n.is_leaf(): # if node is a parent children = self.tree.children(n.identifier) if children[0].is_leaf() and children[1].is_leaf(): # both children must be leaves for an easy remove parent_children.append([n, children]) # [parent, [child1, child2]] # sort each pair of children by their summed interest parent_children.sort(key=lambda x: np.abs(x[1][0].data.interest - x[1][1].data.interest), reverse=False) # remove useless pair child1 = parent_children[0][1][0] child2 = parent_children[0][1][1] # print("just removed {} and {}, daddy is: {}, childs: {}".format(child1.identifier, child2.identifier, # parent_children[0][0].identifier, # self.tree.children( # # print("bef") # parent_children[0][0].identifier))) # print([n.identifier for n in self.tree.all_nodes()]) self.tree.remove_node(child1.identifier) self.tree.remove_node(child2.identifier) # print("aff remove {} and {}".format(child1.identifier), child2.identifier) # print([n.identifier for n in self.tree.all_nodes()]) # remove 1/4 of parent to avoid falling in a splitting-merging loop dadta = parent_children[0][0].data # hahaha! dadta.cps_gs[0] = deque(islice(dadta.cps_gs[0], int(self.maxlen / 4), self.maxlen + 1)) dadta.cps_gs[1] = deque(islice(dadta.cps_gs[1], int(self.maxlen / 4), self.maxlen + 1)) self.nodes_to_recompute.append(parent_children[0][0].identifier) # remove child from recompute list if they where touched when adding the current task if child1.identifier in self.nodes_to_recompute: self.nodes_to_recompute.pop(self.nodes_to_recompute.index(child1.identifier)) if child2.identifier in self.nodes_to_recompute: self.nodes_to_recompute.pop(self.nodes_to_recompute.index(child2.identifier)) def add_task_comp(self, node, task, comp): reg = node.data nid = node.identifier if reg.bounds.contains(task): # task falls within region self.nodes_to_recompute.append(nid) children = self.tree.children(nid) for n in children: # if task in region, task is in one sub-region self.add_task_comp(n, task, comp) need_split = reg.add(task, comp, children == []) # COPY ALL MODE if need_split: self.nodes_to_split.append(nid) def update(self, task, continuous_competence, all_raw_rewards): # add new (task, competence) to regions nodes self.nodes_to_split = [] self.nodes_to_recompute = [] new_split = False root = self.tree.get_node('root') self.add_task_comp(root, task, continuous_competence) #print(self.nodes_to_split) assert len(self.nodes_to_split) <= 1 # split a node if needed need_split = len(self.nodes_to_split) == 1 if need_split: new_split = self.split(self.nodes_to_split[0]) if new_split: self.update_nb += 1 #print(self.update_nb) # update list of regions_bounds all_nodes = self.tree.all_nodes() if len(all_nodes) > self.maxregions: # too many regions, lets merge one of them self.merge(all_nodes) all_nodes = self.tree.all_nodes() self.regions_bounds = [n.data.bounds for n in all_nodes] # recompute interests of touched nodes #print(self.nodes_to_recompute) for nid in self.nodes_to_recompute: #print(nid) node = self.tree.get_node(nid) reg = node.data reg.interest = self.compute_interest(reg.cps_gs) # collect new interests and new [comp, tasks] lists all_nodes = self.tree.all_nodes() self.interest = [] self.cps_gs = [] for n in all_nodes: self.interest.append(n.data.interest) self.cps_gs.append(n.data.cps_gs) # bk-keeping self.all_boxes.append(copy.copy(self.regions_bounds)) self.all_interests.append(copy.copy(self.interest)) self.split_iterations.append(self.update_nb) assert len(self.interest) == len(self.regions_bounds) return new_split, None def draw_random_task(self): return self.regions_bounds[0].sample() # first region is root region def sample_task(self, args): mode = np.random.rand() if mode < 0.1: # "mode 3" (10%) -> sample on regions and then mutate lowest-performing task in region if len(self.sampled_tasks) == 0: self.sampled_tasks.append(self.draw_random_task()) else: region_id = proportional_choice(self.interest, eps=0.0) worst_task_idx = np.argmin(self.cps_gs[region_id][0]) # mutate task by a small amount (i.e a gaussian scaled to the regions range) task = np.random.normal(self.cps_gs[region_id][1][worst_task_idx].copy(), 0.1) # clip to stay within region (add small epsilon to avoid falling in multiple regions) task = np.clip(task, self.regions_bounds[region_id].low + 1e-5, self.regions_bounds[region_id].high - 1e-5) self.sampled_tasks.append(task) elif mode < 0.3: # "mode 2" (20%) -> random task self.sampled_tasks.append(self.draw_random_task()) else: # "mode 1" (70%) -> sampling on regions and then random task in selected region region_id = proportional_choice(self.interest, eps=0.0) self.sampled_tasks.append(self.regions_bounds[region_id].sample()) # # sample region # if np.random.rand() < 0.2: # region_id = np.random.choice(range(self.nb_regions)) # else: # region_id = np.random.choice(range(self.nb_regions), p=np.array(self.probas)) # # sample task # self.sampled_tasks.append(self.regions_bounds[region_id].sample()) # # return self.sampled_tasks[-1].tolist() # sample region # region_id = proportional_choice(self.interest, eps=0.2) # # sample task # self.sampled_tasks.append(self.regions_bounds[region_id].sample()) return self.sampled_tasks[-1] def dump(self, dump_dict): dump_dict['all_boxes'] = self.all_boxes dump_dict['split_iterations'] = self.split_iterations dump_dict['all_interests'] = self.all_interests return dump_dict @property def nb_regions(self): return len(self.regions_bounds) @property def get_regions(self): return self.regions_bounds
json_data = json.load(load_f) #构建新闻事件传播树 tree = Tree() tree.create_node(tag=json_data[0].get("mid"),identifier=json_data[0].get("mid"),data=Info(json_data[0],param = 10)) uu = 1000 if len(json_data) >=1000 else len(json_data) # print('uu:',uu) for j in range(1,uu): try: tree.create_node(tag=json_data[j].get("mid"),identifier=json_data[j].get("mid"),parent=json_data[j].get("parent"),data=Info(json_data[j])) except: pass #单颗传播树构建完成 #对传播树进行简化 print('简化前:tree_depth:',tree.depth(),'tree_nodes:',len(tree.all_nodes())) tree = simplify_tree(tree) print('简化后:tree_depth:', tree.depth(),'tree_nodes:',len(tree.all_nodes())) trees.append(tree) # print(eid,"---trees simplified") #所有新闻事件的传播树构建结束 #3折交叉法 pd_data = pd.read_csv('id_label.txt', sep='\t', header=None) # wb_data = pd_data.as_matrix()[0:20,]
def unexpanded_nodes(game: treelib.Tree): return sum([int(not x.data[2]) for x in game.all_nodes()])
class PrePruneTree: def __init__(self, vehsInfo): self.tree = Tree() self.root = self.tree.create_node("Root", "root") # root node self.vehsInfo = vehsInfo self.vehList = list(vehsInfo.keys()) self.pruneList = None @tail_call_optimized def _build(self, currentNode, vehList): ''' :param vehList: A dict, keys is the set of vehicles, value is a tuple which represents (lane, position) :param currentNode: The current node in the tree ''' s = [currentNode.tag.find(vid) for vid in vehList] # the quit condition in recursion if (np.array(s) >= 0).all() or self._testIllegel(currentNode.tag): return for vehId in vehList: if vehId not in currentNode.tag: if currentNode.is_root: prefix = currentNode.tag.replace("Root", "") else: prefix = currentNode.tag self.tree.create_node(prefix + vehId + "-", prefix + vehId, parent=currentNode) # self.show() for node in self.tree.all_nodes(): if node.is_leaf() and not self._testPrePrune(node.tag): self._build(currentNode=node, vehList=vehList) def _testIllegel(self, tag): ''' test whether need to stop recursion :param tag: Node tag :return: boolean (if true, the recursion will stop) ''' # upToRight.1-leftToBelow.18-belowToRight.2-belowToRight.3- flag = False tmp = tag.split("-") try: tmp.remove('') except: pass if len(tmp) < len(list(self.vehsInfo.keys())) - 1: return flag surplusVeh = list(set(self.vehList) - set(tmp)) for veh1 in surplusVeh: for veh2 in list(set(self.vehsInfo.keys()) - set(tmp)): for subList in self.pruneList: if surplusVeh in subList and veh2 in subList: if subList.index(veh2) > subList.index(veh1): flag = True return flag def _testPrePrune(self, tag): ''' test whether need to preprune :param tag: :return: ''' flag = False vehs = tag.split("-") try: vehs.remove('') except: pass for veh1 in vehs: for veh2 in list(set(self.vehsInfo.keys()) - set(vehs)): for subList in self.pruneList: if veh1 in subList and veh2 in subList: if subList.index(veh2) < subList.index(veh1): flag = True return flag def obtainPruneList(self): laneId = [value[0] for value in self.vehsInfo.values()] sortedList = [] for i in list(set(laneId)): lane_info = {k: v[1] for k, v in self.vehsInfo.items() if v[0] == i} # Vehicles in front are at the front of the lane sortedList.append([vid[0] for vid in sorted(lane_info.items(), key=itemgetter(1), reverse=True)]) pruneList = [sublist for sublist in sortedList if len(sublist) > 1] return pruneList def build(self): threading.stack_size(20000000) self.pruneList = self.obtainPruneList() # start a new thread to generate tree thread = threading.Thread(target=self._build(self.root, self.vehList)) thread.start() def show(self): self.tree.show() def _leaves(self): ''' :return: All the plan for vehicle passing currently. ''' all_nodes = self.tree.all_nodes() return [node for node in all_nodes if node.is_leaf()] def legal_orders(self): leaves = self._leaves() orders = [] for pattern in leaves: # upToRight.1-leftToBelow.18-belowToRight.2-belowToRight.3- tmp = pattern.tag.split("-") try: tmp.remove('') except: pass if len(tmp) == self.tree.depth(): orders.append(tmp) return orders
def unexpanded_night_nodes(game: treelib.Tree): return sum([ int((not x.data[2]) and x.data[1].time == 1) for x in game.all_nodes() ])
class TreeT(object): def __init__(self, max_id=0): self.tree = Tree() def from_ptb_to_tree(self, line, max_id=0, leaf_id=1, parent_id=None): # starts by ['(', 'pos'] pos_tag = line[1] if parent_id is None: pos_id = 0 else: pos_id = max_id max_id += 1 self.tree.create_node(pos_tag, pos_id, parent_id, TreeData()) parent_id = pos_id total_offset = 2 if line[2] != '(': # sub-tree is leaf # line[0:3] = ['(', 'pos', 'word', ')'] word_tag = line[2] self.tree.create_node(word_tag, leaf_id, parent_id, TreeData()) return 4, max_id, leaf_id + 1 line = line[2:] while line[0] != ')': offset, max_id, leaf_id = self.from_ptb_to_tree( line, max_id, leaf_id, parent_id) total_offset += offset line = line[offset:] return total_offset + 1, max_id, leaf_id def add_height(self, tree_dep): for n in self.tree.all_nodes(): n.data.leaves = [] for leaf in self.tree.leaves(): lid = leaf.identifier hid = tree_dep[lid] if hid == self.tree.root: self.tree[lid].data.height = self.tree.depth(self.tree[lid]) for cid in [ p for p in self.tree.paths_to_leaves() if lid in p ][0]: self.tree[cid].data.leaves += [lid] else: height = -1 cid = lid cond = True while cond: self.tree[cid].data.leaves += [lid] height += 1 cid = self.tree.parent(cid).identifier cid_leaves = [l.identifier for l in self.tree.leaves(cid)] cid_l_dep = [tree_dep[l] for l in cid_leaves if l != lid] cond = set(cid_l_dep).issubset(set(cid_leaves)) self.tree[lid].data.height = height x_nodes = [ n.identifier for n in self.tree.all_nodes() if n.data.leaves == [] ] for x_node in x_nodes[::-1]: min_id = min(self.tree.children(x_node), key=lambda c: c.data.height) _lid = min_id.data.leaves[0] self.tree[_lid].data.height += 1 self.tree[x_node].data.leaves += [_lid] return True def _from_tree_to_ptb(self, nid): nid = self.tree.subtree(nid).root if self.tree[nid].is_leaf(): return ' (' + self.tree[nid].tag + ' ' + self.tree[ nid].data.word + ')' res = ' (' + self.tree[nid].tag for c_nid in sorted(self.tree.children(nid), key=lambda x: x.identifier): res += self._from_tree_to_ptb(c_nid.identifier) return res + ')' def from_tree_to_ptb(self): return self._from_tree_to_ptb(self.tree.root) def from_tag_to_tree(self, tag, word, pos_id=0): parent_id = None for tag_nodes in tag: if tag_nodes[0] in [CL, CR]: c_side = tag_nodes[0] _tag_nodes = tag_nodes[1:] if len(tag_nodes) > 1 else [''] else: c_side = '' _tag_nodes = tag_nodes self.tree.create_node(_tag_nodes[0], pos_id, parent=parent_id, data=TreeData(comb_side=c_side)) parent_id = pos_id pos_id += 1 for tag_node in _tag_nodes[1:]: self.tree.create_node(tag_node[1:], pos_id, parent=parent_id, data=TreeData(miss_side=tag_node[0])) pos_id += 1 for l in self.tree.leaves(): if l.data.miss_side == '': l.data.word = word break return pos_id @memoize def is_combine_to(self, side): return self.tree[self.tree.root].data.comb_side == side @memoize def is_combine_right(self): return self.is_combine_to(CR) @memoize def is_combine_left(self): return self.is_combine_to(CL) @memoize def is_complete_tree(self): return all([n.data.miss_side == '' for n in self.tree.all_nodes()]) @memoize def get_missing_leaves_to(self, miss_val, side): return [ l.identifier for l in self.tree.leaves(self.tree.root) if l.data.miss_side == side and l.tag == miss_val ] @memoize def get_missing_leaves_left(self, miss_val): return self.get_missing_leaves_to(miss_val, L) @memoize def get_missing_leaves_right(self, miss_val): return self.get_missing_leaves_to(miss_val, R) @memoize def root_tag(self): return self.tree[self.tree.root].tag @memoize def is_no_missing_leaves(self): return all( [l.data.miss_side == '' for l in self.tree.leaves(self.tree.root)]) @memoize def combine_tree(self, _tree, comb_leaf): self.tree.paste(comb_leaf, _tree.tree) self.tree.link_past_node(comb_leaf) return self def tree_to_path(self, nid, path): # Stop condition if self.tree[nid].is_leaf(): path[nid] = [] return nid, self.tree[nid].data.height # Recursion flag = CR for child in self.tree.children(nid): cid = child.identifier leaf_id, height = self.tree_to_path(cid, path) if (height == 0): # Reached end of path can add flag path[leaf_id].insert(0, flag) # path[leaf_id].append(flag) if height > 0: path[leaf_id].insert(0, nid) # only single child will have height>0 # and its value will be the one that is returned # to the parent ret_leaf_id, ret_height = leaf_id, height - 1 # once we reached a height>0, it means that # this path includes the parent, and thus flag # direction should flip flag = CL return ret_leaf_id, ret_height def path_to_tags(self, path): tags = [] for p in path: _res = [] _p = copy.copy(p) if _p[0] in [CL, CR]: _res.append(_p[0]) _p = _p[1:] while _p[:-1]: el_p = _p.pop(0) _res.append(self.tree[el_p].tag) for c in self.tree.children(el_p): if c.identifier != _p[0]: _res.append(R + c.tag if c.identifier > _p[0] else L + c.tag) _res.append(self.tree[_p[0]].tag) tags.append(_res) return tags def path_to_words(self, path): return [self.tree[k].tag for k in path] def from_tree_to_tag(self): path = {} self.tree_to_path(self.tree.root, path) return { 'tags': self.path_to_tags(path.values()), 'words': self.path_to_words(path.keys()) } def from_ptb_to_tag(self, line, max_id, depend): self.from_ptb_to_tree(line, max_id) self.add_height(depend) path = {} self.tree_to_path(self.tree.root, path) return self.path_to_tags(path.values())
def level_size(tree: treelib.Tree, level=1): return len( [x for x in tree.all_nodes() if tree.level(x.identifier) == level])
from treelib import Tree tree = Tree() tree.create_node("a", "a", data={"v": 0}) tree.create_node("b", "b", data={"v": 7}, parent="a") tree.create_node("c", "c", data={"v": 4}, parent="b") tree.create_node("d", "d", data={"v": 3}, parent="b") tree.create_node("f", "f", data={"v": 0}, parent="b") tree.create_node("e", "e", data={"v": 3}, parent="a") print(tree) def func_1(node): v = 0 children = tree.children(node.identifier) for child in children: if child.data["v"] == 0 and len(tree.children(child.identifier)) != 0: child.data["v"] = func_1(child) v += child.data["v"] return v for node in tree.all_nodes(): if node.data["v"] == 0 and len(tree.children(node.identifier)) != 0: node.data["v"] = func_1(node) for node in tree.all_nodes(): print("node: " + node.tag + " value: {}".format(node.data["v"]))
class HFE(FeatureTransformChoice): def __init__(self, dataset_shape, taxonomy=None): self.dataset_shape = dataset_shape self.taxonomy = taxonomy def fit(self, hyperparameters, X, y): # Initialize new tree self.tree = Tree() if isinstance(self.taxonomy, type(None)): print( 'Warning: Could not execute HFE algorithm no taxonomy provided' ) else: # Add otus to the internal tree structure for i, otu in enumerate( [a for a in X.columns.values if a.lower().startswith('otu')]): self._add_otu_to_tree(self.tree, otu, X.copy()) # Perform filtering then determine which nodes in the tree will act as the final features self._correlation_filter() self._path_ig_filter(y) self._leaf_ig_filter(y) self.valid_ids = [ x.identifier for x in self.tree.all_nodes() if x.data['valid'] ] def transform(self, X, y=None): # Build a new tree (Note: The self.tree is built on training data, so we need a new tree for unseen data) new_tree = Tree() for i, otu in enumerate( [a for a in X.columns.values if a.lower().startswith('otu')]): self._add_otu_to_tree(new_tree, otu, X.copy()) # Extract the final features and store in dataframe result = pd.DataFrame() for i, current_id in enumerate(self.valid_ids): result[i] = new_tree[current_id].data['feature_vector'] return result def get_name(self): return 'HFE' def hyperparameter_grid(self): return None # Description: Populates tree structure def _add_otu_to_tree(self, tree, otu_name, otu_table): raw_string = self.taxonomy['Taxonomy'][otu_name.lower() == np.array( [x.lower() for x in self.taxonomy['OTU']])].values[0] taxonomic_string = re.sub(r'[()0-9]', '', raw_string) taxonomic_levels = taxonomic_string.split(';') feature_vector = otu_table.loc[:, otu_name].values.copy() for i, level in enumerate(taxonomic_levels): # Some of the levels might be empty if there an extra ; in the taxonomy file or two back to back. These should be skipped if level == '': continue if tree.contains(level): # Increment node OTU vector with argument OTU vector (ie node vector + new otu vector) tree[level].data['feature_vector'] += feature_vector.copy() else: # Create new node with dictionary of OTU vector and boolean flag indicating valid node tree.create_node( tag=level, identifier=level, parent=taxonomic_levels[i - 1] if i != 0 else None, data={ 'feature_vector': feature_vector.copy(), 'valid': True }) # Description: Removes all child nodes whose feature vector is correlated with their parent node by switching the valid flag for that node to False def _correlation_filter(self, threshold=0.80): paths = self.tree.paths_to_leaves() for path in paths: if len(path) < 1: continue for i in range(1, len(path)): parent_feature_vector = self.tree[path[ -1 - i + 1]].data['feature_vector'] child_feature_vector = self.tree[path[ -1 - i]].data['feature_vector'] current_correlation = pearsonr(parent_feature_vector, child_feature_vector)[0] if current_correlation > threshold: self.tree[path[-1 - i]].data['valid'] = False # Description: Filters nodes based on average path information gain (IG) def _path_ig_filter(self, labels): paths = self.tree.paths_to_leaves() for path in paths: added = 0 avg_path_IG = None for i, bacteria in enumerate(path): # For thoses nodes that passed the correlation filter compute the running IG average if self.tree[bacteria].data['valid']: if added == 0: avg_path_IG = mutual_info_classif( X=self.tree[bacteria].data['feature_vector']. reshape(len(labels), 1), y=labels, random_state=0) added += 1 else: avg_path_IG = avg_path_IG * (added / (added + 1)) + ( mutual_info_classif(X=self.tree[bacteria].data[ 'feature_vector'].reshape(len(labels), 1), y=labels, random_state=0) / (added + 1)) added += 1 # If a node in the path is less than the average or it is uninformative (ie all zeros) remove the node for bacteria in path: current_bacteria_IG = mutual_info_classif( X=self.tree[bacteria].data['feature_vector'].reshape( len(labels), 1), y=labels, random_state=0) if (avg_path_IG == None) or (current_bacteria_IG < avg_path_IG) or sum( self.tree[bacteria].data['feature_vector']) == 0: self.tree[bacteria].data['valid'] = False # Description: Filter leaf nodes in incomplete paths based on global information gain (IG) def _leaf_ig_filter(self, labels): # Returns whether a path is incomplete def incomplete_path(path): incomplete = False for bacteria in path: if not self.tree[bacteria].data['valid']: incomplete = True return incomplete paths = self.tree.paths_to_leaves() avg_tree_IG = None added = 0 # Compute global avg IG for path in paths: for bacteria in path: # Only the remaining nodes contribute to global avg IG if self.tree[bacteria].data['valid']: if added == 0: avg_tree_IG = mutual_info_classif( X=self.tree[bacteria].data['feature_vector']. reshape(len(labels), 1), y=labels, random_state=0) added += 1 else: avg_tree_IG = avg_tree_IG * (added / (added + 1)) + ( mutual_info_classif(X=self.tree[bacteria].data[ 'feature_vector'].reshape(len(labels), 1), y=labels, random_state=0) / (added + 1)) added += 1 # The leaf nodes are the final element in the path. If the leaf node IG is zero or less than global IG and is a part of an incomplete path it should be removed for path in paths: leaf_node_IG = mutual_info_classif( X=self.tree[path[-1]].data['feature_vector'].reshape( len(labels), 1), y=labels, random_state=0) if incomplete_path(path) and ((leaf_node_IG == 0) or (leaf_node_IG < avg_tree_IG)): self.tree[path[-1]].data['valid'] = False
class DMNSPolicy(Policy): """ Representation of a deterministic markovian non-stationary policy. Deterministic markovian means that the the act method returns only an action, and that the relevant history consists only in the current state. Attributes __________ policy: dict A dictionary with the deterministic markovian policy. tree: treelib.Tree a tree object for visualization purposes. """ def __init__(self, space): super().__init__(space) def __repr__(self): st = str(self.tree.show()) return st def _create_tree(self, initial_state): """ A tree object for visualization purposes is created. Parameters ---------- initial_state: state An initial state """ self.tree = Tree() self.tree.add_node( Node(f'({0}:{initial_state}:{self.policy[(0, initial_state)]})', f'({0}:{initial_state}:{self.policy[(0, initial_state)]})')) def add_sons(s, t): a = self.policy[(t, s)] if t == self.T: for st in self.Q(s, a): n = Node(f'({t + 1}:{st})', f'({t + 1}:{st})') self.tree.add_node(node=n, parent=f'({t}:{s}:{a})') elif t < self.T - 1: for st in self.Q(s, a): at = self.policy[(t + 1, st)] n = Node(f'({t + 1}:{st}:{at})', f'({t + 1}:{st}:{at})') if n.identifier not in map(lambda x: x.identifier, self.tree.all_nodes()): self.tree.add_node(node=n, parent=f'({t}:{s}:{a})') add_sons(st, t + 1) add_sons(initial_state, 0) def act(self, history): """ Parameters ---------- history Returns ------- """ time, state = history return self.policy[(time, state)] def add_action(self, time, state, action): """ Add an action to the policy. Parameters ---------- time: int Time state: State State action: Action Action """ # assert state in self.S and action in self.A self.policy[time, state] = action def add_policy(self, policy, initial_state=None): """ Adds a complete policy Parameters ---------- policy: dict Policy to add initial_state: state (Optional) If the initial state is given, a tree for visualization purposes is created. """ self.policy = policy if initial_state is not None: self._create_tree(initial_state)
def tree_build_from_dict(containers=None): """ Build a tree based on a unsorted dictConfig(config). Build a tree of containers based on an unsorted dict of containers. Example: -------- >>> containers = {'Fabric': {'parent_container': 'Tenant'}, 'Leaves': {'configlets': ['container_configlet'], 'devices': ['veos01'], 'images': ['4.22.0F'], 'parent_container': 'Fabric'}, 'MLAG01': {'configlets': ['container_configlet'], 'devices': ['veos01'], 'images': ['4.22.0F'], 'parent_container': 'Leaves'}, 'MLAG02': {'configlets': ['container_configlet'], 'devices': ['veos01'], 'images': ['4.22.0F'], 'parent_container': 'Leaves'}, 'Spines': {'configlets': ['container_configlet'], 'devices': ['veos01'], 'images': ['4.22.0F'], 'parent_container': 'Fabric'}} >>> print(tree_build_from_dict(containers=containers)) {"Tenant": {"children": [{"Fabric": {"children": [{"Leaves": {"children": ["MLAG01", "MLAG02"]}}, "Spines"]}}]}} Parameters ---------- containers : dict, optional Container topology to create on CVP, by default None Returns ------- json tree topology """ # Create tree object tree = Tree() # Create the base node previously_created = list() # Create root node to mimic CVP behavior tree.create_node("Tenant", "Tenant") # Iterate for first level of containers directly attached under root. for container_name, container_info in containers.items(): if container_info['parent_container'] in ['Tenant']: previously_created.append(container_name) tree.create_node(container_name, container_name, parent=container_info['parent_container']) # Loop since expected tree is not equal to number of entries in container topology while len(tree.all_nodes()) < len(containers) + 1: for container_name, container_info in containers.items(): if tree.contains( container_info['parent_container'] ) and container_info['parent_container'] not in ['Tenant']: try: tree.create_node(container_name, container_name, parent=container_info['parent_container']) except: # noqa E722 continue return tree.to_json()
class RIAC(AbstractTeacher): def __init__(self, mins, maxs, seed, env_reward_lb, env_reward_ub, max_region_size=200, alp_window_size=None, nb_split_attempts=50, sampling_in_leaves_only=False, min_region_size=None, min_dims_range_ratio=1 / 6, discard_ratio=1 / 4): AbstractTeacher.__init__(self, mins, maxs, env_reward_lb, env_reward_ub, seed) # Maximal number of (task, reward) pairs a region can hold before splitting self.maxlen = max_region_size self.alp_window = self.maxlen if alp_window_size is None else alp_window_size # Initialize Regions' tree self.tree = Tree() self.regions_bounds = [Box(self.mins, self.maxs, dtype=np.float32)] self.regions_alp = [0.] self.tree.create_node('root', 'root', data=Region(maxlen=self.maxlen, r_t_pairs=[ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ], bounds=self.regions_bounds[-1], alp=self.regions_alp[-1])) self.nb_dims = len(mins) self.nb_split_attempts = nb_split_attempts # Whether task sampling uses parent and child regions (False) or only child regions (True) self.sampling_in_leaves_only = sampling_in_leaves_only # Additional tricks to original RIAC, enforcing splitting rules # 1 - Minimum population required for both children when splitting --> set to 1 to cancel self.minlen = self.maxlen / 20 if min_region_size is None else min_region_size # 2 - minimum children region size (compared to initial range of each dimension) # Set min_dims_range_ratio to 1/np.inf to cancel self.dims_ranges = self.maxs - self.mins self.min_dims_range_ratio = min_dims_range_ratio # 3 - If after nb_split_attempts, no split is valid, flush oldest points of parent region # If 1- and 2- are canceled, this will be canceled since any split will be valid self.discard_ratio = discard_ratio # book-keeping self.sampled_tasks = [] self.all_boxes = [] self.all_alps = [] self.update_nb = -1 self.split_iterations = [] self.hyperparams = locals() def compute_alp(self, sub_region): if len(sub_region[0]) > 2: cp_window = min(len(sub_region[0]), self.alp_window) # not completely window half = int(cp_window / 2) # print(str(cp_window) + 'and' + str(half)) first_half = np.array(sub_region[0])[-cp_window:-half] snd_half = np.array(sub_region[0])[-half:] diff = first_half.mean() - snd_half.mean() cp = np.abs(diff) else: cp = 0 alp = np.abs(cp) return alp def split(self, nid): # Try nb_split_attempts splits on region corresponding to node <nid> reg = self.tree.get_node(nid).data best_split_score = 0 best_bounds = None best_sub_regions = None is_split = False for i in range(self.nb_split_attempts): sub_reg1 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] sub_reg2 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] # repeat until the two sub regions contain at least minlen of the mother region while len(sub_reg1[0]) < self.minlen or len( sub_reg2[0]) < self.minlen: # decide on dimension dim = self.random_state.choice(range(self.nb_dims)) threshold = reg.bounds.sample()[dim] bounds1 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32) bounds1.high[dim] = threshold bounds2 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32) bounds2.low[dim] = threshold bounds = [bounds1, bounds2] valid_bounds = True if np.any(bounds1.high - bounds1.low < self.dims_ranges * self.min_dims_range_ratio): valid_bounds = False if np.any(bounds2.high - bounds2.low < self.dims_ranges * self.min_dims_range_ratio): valid_bounds = valid_bounds and False # perform split in sub regions sub_reg1 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] sub_reg2 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] for i, task in enumerate(reg.r_t_pairs[1]): if bounds1.contains(task): sub_reg1[1].append(task) sub_reg1[0].append(reg.r_t_pairs[0][i]) else: sub_reg2[1].append(task) sub_reg2[0].append(reg.r_t_pairs[0][i]) sub_regions = [sub_reg1, sub_reg2] # compute alp alp = [self.compute_alp(sub_reg1), self.compute_alp(sub_reg2)] # compute score split_score = len(sub_reg1) * len(sub_reg2) * np.abs(alp[0] - alp[1]) if split_score >= best_split_score and valid_bounds: is_split = True best_split_score = split_score best_sub_regions = sub_regions best_bounds = bounds if is_split: # add new nodes to tree for i, (r_t_pairs, bounds) in enumerate(zip(best_sub_regions, best_bounds)): self.tree.create_node(identifier=self.tree.size(), parent=nid, data=Region(self.maxlen, r_t_pairs=r_t_pairs, bounds=bounds, alp=alp[i])) else: assert len(reg.r_t_pairs[0]) == (self.maxlen + 1) reg.r_t_pairs[0] = deque( islice(reg.r_t_pairs[0], int(self.maxlen * self.discard_ratio), self.maxlen + 1)) reg.r_t_pairs[1] = deque( islice(reg.r_t_pairs[1], int(self.maxlen * self.discard_ratio), self.maxlen + 1)) return is_split def add_task_reward(self, node, task, reward): reg = node.data nid = node.identifier if reg.bounds.contains(task): # task falls within region self.nodes_to_recompute.append(nid) children = self.tree.children(nid) for n in children: # if task in region, task is in one sub-region self.add_task_reward(n, task, reward) need_split = reg.add(task, reward, children == []) # COPY ALL MODE if need_split: self.nodes_to_split.append(nid) def episodic_update(self, task, reward, is_success): self.update_nb += 1 # Add new (task, reward) to regions nodes self.nodes_to_split = [] self.nodes_to_recompute = [] new_split = False root = self.tree.get_node('root') self.add_task_reward( root, task, reward) # Will update self.nodes_to_split if needed assert len(self.nodes_to_split) <= 1 # Split a node if needed need_split = len(self.nodes_to_split) == 1 if need_split: new_split = self.split(self.nodes_to_split[0]) # Execute the split if new_split: # Update list of regions_bounds if self.sampling_in_leaves_only: self.regions_bounds = [ n.data.bounds for n in self.tree.leaves() ] else: self.regions_bounds = [ n.data.bounds for n in self.tree.all_nodes() ] # Recompute ALPs of modified nodes for nid in self.nodes_to_recompute: node = self.tree.get_node(nid) reg = node.data reg.alp = self.compute_alp(reg.r_t_pairs) # Collect regions data (regions' ALP and regions' (task, reward) pairs) all_nodes = self.tree.all_nodes( ) if not self.sampling_in_leaves_only else self.tree.leaves() self.regions_alp = [] self.r_t_pairs = [] for n in all_nodes: self.regions_alp.append(n.data.alp) self.r_t_pairs.append(n.data.r_t_pairs) # Book-keeping if new_split: self.all_boxes.append(copy.copy(self.regions_bounds)) self.all_alps.append(copy.copy(self.regions_alp)) self.split_iterations.append(self.update_nb) assert len(self.regions_alp) == len(self.regions_bounds) return new_split, None def sample_random_task(self): return self.regions_bounds[0].sample() # First region is root region def sample_task(self): mode = self.random_state.rand() if mode < 0.1: # "mode 3" (10%) -> sample on regions and then mutate lowest-performing task in region if len(self.sampled_tasks) == 0: self.sampled_tasks.append(self.sample_random_task()) else: self.sampled_tasks.append( self.non_exploratory_task_sampling()["task"]) elif mode < 0.3: # "mode 2" (20%) -> random task self.sampled_tasks.append(self.sample_random_task()) else: # "mode 1" (70%) -> proportional sampling on regions based on ALP and then random task in selected region region_id = proportional_choice(self.regions_alp, self.random_state, eps=0.0) self.sampled_tasks.append(self.regions_bounds[region_id].sample()) return self.sampled_tasks[-1].astype(np.float32) def non_exploratory_task_sampling(self): # 1 - Sample region proportionally to its ALP region_id = proportional_choice(self.regions_alp, self.random_state, eps=0.0) # 2 - Retrieve (task, reward) pair with lowest reward worst_task_idx = np.argmin(self.r_t_pairs[region_id][0]) # 3 - Mutate task by a small amount (using Gaussian centered on task, with 0.1 std) task = self.random_state.normal( self.r_t_pairs[region_id][1][worst_task_idx].copy(), 0.1) # clip to stay within region (add small epsilon to avoid falling in multiple regions) task = np.clip(task, self.regions_bounds[region_id].low + 1e-5, self.regions_bounds[region_id].high - 1e-5) return { "task": task, "infos": { "bk_index": len(self.all_boxes) - 1, "task_infos": region_id } } def dump(self, dump_dict): dump_dict['all_boxes'] = self.all_boxes dump_dict['split_iterations'] = self.split_iterations dump_dict['all_alps'] = self.all_alps # dump_dict['riac_params'] = self.hyperparams return dump_dict @property def nb_regions(self): return len(self.regions_bounds) @property def get_regions(self): return self.regions_bounds
def use_hyp(word2syn, output, data): un_change = [] dic = Tree() dic.create_node("100001740", "100001740") add = -1 while add != 0: add = 0 f = open(datapath + "wn_hyp.pl", "r") while True: line = f.readline() if not line: break else: l, r = re.findall('\d+', line) try: dic.create_node(l, l, parent=r) add += 1 except: pass print(dic.size()) entail = defaultdict(list) for n in dic.all_nodes(): for m in dic.subtree(n.tag).all_nodes(): if m.tag != n.tag: entail[n.tag].append(m.tag) label = set() for d in data: d0 = d[0] d1 = d[1] if p.singular_noun(d[0]) != False: d0 = p.singular_noun(d[0]) if p.singular_noun(d[1]) != False: d1 = p.singular_noun(d[1]) for i in word2syn[d0]: for j in word2syn[d1]: if j in entail[i]: if d[0] + "\t" + ">" + "\t" + d[1] not in output: output += [d[0] + "\t" + ">" + "\t" + d[1]] label.add(d) elif i in entail[j]: if d[0] + "\t" + "<" + "\t" + d[1] not in output: output += [d[0] + "\t" + "<" + "\t" + d[1]] label.add(d) if d not in un_change and d not in label: un_change += [d] print("before single: " + str(len(data)) + " after: " + str(len(un_change))) output += ["\n"] del entail data = un_change del un_change un_change = [] alter = defaultdict(list) for n in dic.all_nodes(): for m in dic.siblings(n.tag): if m.tag != n.tag and n.bpointer != m.tag: alter[n.tag].append(m.tag) label = set() for d in data: d0 = d[0] d1 = d[1] if p.singular_noun(d[0]) != False: d0 = p.singular_noun(d[0]) if p.singular_noun(d[1]) != False: d1 = p.singular_noun(d[1]) for i in word2syn[d0]: for j in word2syn[d1]: if j in alter[i]: if d[0] + "\t" + "|" + "\t" + d[1] not in output: output += [d[0] + "\t" + "|" + "\t" + d[1]] label.add(d) elif i in alter[j]: if d[0] + "\t" + "|" + "\t" + d[1] not in output: output += [d[0] + "\t" + "|" + "\t" + d[1]] label.add(d) if d not in un_change and d not in label: un_change += [d] del alter print("before single: " + str(len(data)) + " after: " + str(len(un_change))) output += ["\n"] return output, un_change