def optimized_structural_coding_tree(G): """ optimized_structural_coding_tree is a function that estimates the 2-dimensional strutural entropy (i.e. structural information) of graph G param G: graph return: the structural information(entropy) and the structure of underlying entropy (i.e. the partition of G that the entropy is minimum). note: we modify the algorithm, which is defined by Angsheng Li and Yichen Pan in [1], definition 6, and implemented by Li et al in [2]. [1] Angsheng Li, Yicheng Pan: Structural Information and Dynamical Complexity of Networks. IEEE Trans. Information Theory 62(6): 3290-3339 (2016) [2] Angsheng L , Xianchen Y , Bingxiang X , et al. Decoding topologically associating domains with ultra-low resolution Hi-C data by graph structural entropy. Nature Communications, 2018, 9(1):3265-. """ codetree = init_codetree(G) while True: if codetree.depth() > nx.number_of_nodes(G): break fpointers_ids_of_root = codetree.get_node(codetree.root).fpointer merge_delta_entropy = 0 for node_id_alpha in fpointers_ids_of_root: alpha_index = fpointers_ids_of_root.index(node_id_alpha) for node_id_beta in fpointers_ids_of_root[alpha_index + 1:]: if not codetree.get_node(node_id_alpha).is_leaf( ) and codetree.get_node(node_id_beta).is_leaf(): mg_T = merge(codetree, node_id_alpha, node_id_beta) ent = merge_delta(G, codetree, mg_T, node_id_alpha, node_id_beta) if ent > merge_delta_entropy: merge_delta_entropy = ent merged_tree = Tree(mg_T.subtree(mg_T.root), deep=True) elif codetree.get_node(node_id_alpha).is_leaf( ) and not codetree.get_node(node_id_beta).is_leaf(): mg_T = merge(codetree, node_id_beta, node_id_alpha) ent = merge_delta(G, codetree, mg_T, node_id_beta, node_id_alpha) if ent > merge_delta_entropy: merge_delta_entropy = ent merged_tree = Tree(mg_T.subtree(mg_T.root), deep=True) combine_delta_entropy = 0 for node_id_alpha in fpointers_ids_of_root: alpha_index = fpointers_ids_of_root.index(node_id_alpha) for node_id_beta in fpointers_ids_of_root[alpha_index + 1:]: if codetree.get_node(node_id_alpha).is_leaf( ) and codetree.get_node(node_id_beta).is_leaf(): cm_T = combine(codetree, node_id_alpha, node_id_beta) ent = combine_delta(G, codetree, cm_T, node_id_alpha, node_id_beta) if ent > combine_delta_entropy: combine_delta_entropy = ent combined_tree = Tree(cm_T.subtree(cm_T.root), deep=True) if merge_delta_entropy > 0 and merge_delta_entropy > combine_delta_entropy: codetree = Tree(merged_tree.subtree(merged_tree.root), deep=True) elif combine_delta_entropy > 0 and combine_delta_entropy > merge_delta_entropy: codetree = Tree(combined_tree.subtree(combined_tree.root), deep=True) else: break return codetree
def optimal_graph_coding_tree(G): codetree = init_codetree(G) while True: # print("round:") merge_delta_entropy = 0 for alpha_nid in codetree.expand_tree(mode=2): for beta_node in codetree.siblings(alpha_nid): if not codetree.get_node( alpha_nid).is_leaf() and beta_node.is_leaf(): mg_T = merge(codetree, alpha_nid, beta_node.identifier) ent = merge_delta(G, codetree, mg_T, alpha_nid, beta_node.identifier) if ent > merge_delta_entropy: merge_delta_entropy = ent merged_tree = Tree(mg_T.subtree(mg_T.root), deep=True) # print("merge tree:") # print("en:",merge_delta_entropy) # merged_tree.show() elif codetree.get_node( alpha_nid).is_leaf() and not beta_node.is_leaf(): mg_T = merge(codetree, beta_node.identifier, alpha_nid) ent = merge_delta(G, codetree, mg_T, beta_node.identifier, alpha_nid) if ent > merge_delta_entropy: merge_delta_entropy = ent merged_tree = Tree(mg_T.subtree(mg_T.root), deep=True) # print("merge tree:") # print("en:",merge_delta_entropy) # merged_tree.show() combine_delta_entropy = 0 for alpha_nid in codetree.expand_tree(nid=-1, mode=2): for beta_node in codetree.siblings(alpha_nid): if len(codetree.siblings(alpha_nid)) == 1: break if codetree.get_node( alpha_nid).is_leaf() and beta_node.is_leaf(): cm_T = combine(codetree, alpha_nid, beta_node.identifier) ent = combine_delta(G, codetree, cm_T, alpha_nid, beta_node.identifier) if ent > combine_delta_entropy: combine_delta_entropy = ent combined_tree = Tree(cm_T.subtree(cm_T.root), deep=True) # print("combine tree:") # print("en:",combine_delta_entropy) # combined_tree.show() if merge_delta_entropy > 0 and merge_delta_entropy > combine_delta_entropy: codetree = Tree(merged_tree.subtree(merged_tree.root), deep=True) elif combine_delta_entropy > 0 and combine_delta_entropy > merge_delta_entropy: codetree = Tree(combined_tree.subtree(combined_tree.root), deep=True) else: break return codetree
def traverseLogR(self, tree: Tree, action, builder=None): if builder is None: action(tree) else: action(tree, builder) for nid in tree._rootNode.fpointer: traverseLogR(tree.subtree(nid), action, builder)
def get_data(self, json_file_location, file_name, name): data = {} sub_t = Tree() directory = os.path.join(json_file_location) with open(directory + file_name, 'rb') as config_dictionary_file: sub_t = pickle.load(config_dictionary_file) return sub_t.subtree(name).to_json(with_data=True)
def create_oa_tree(): #global oa_tree #debug only #连接数据库 db, cursor = connect_db('localhost', 'root', 'yoyoball', 'np020') oa_tree = Tree() sql = "SELECT `orgid`, `shortname`, `parentorgid` FROM groupinfo" cursor.execute(sql) dept_result = cursor.fetchall() #print dept_result debug only if dept_result != None and len(dept_result) > 0: oa_tree.create_node('##oa_root##', '0000') #先创建虚拟根 for i in range(len(dept_result)): #向虚拟根填充所有组织 #print dept_result[i][1].decode('utf-8'), dept_result[i][0], dept_result[i][2] oa_tree.create_node(dept_result[i][1], dept_result[i][0], '0000') for i in range(len(dept_result)): #修改隶属关系 if dept_result[i][ 0] != '0000': #只要不是实根,就要修改隶属关系【OA中'001000'等的组织上级为‘0000’即OA数据库中存在虚根,所以无需做此步骤】 if oa_tree.contains(dept_result[i][2]): #判断上级是否存在 oa_tree.move_node(dept_result[i][0], dept_result[i][2]) else: #没有上级的不修改 continue #断开数据库 close_db(db) return oa_tree.subtree('006953') #用于集成本部测试
def create_ding_tree(): #global ding_tree, dept_result #debug only #连接数据库 db, cursor = connect_db('localhost', 'root', 'yoyoball', 'dingtalk') ding_tree = Tree() sql = "SELECT `id`, `name`, `parentid` FROM dingding_department_list" cursor.execute(sql) dept_result = cursor.fetchall() #print dept_result #debug only if dept_result != None and len(dept_result) > 0: ding_tree.create_node('##ding_root##', '0') #先创建虚拟根 for i in range(len(dept_result)): #向虚拟根填充所有组织 #print dept_result[i] #debug only #ding_tree.create_node(dept_result[i][1].decode('utf-8'), dept_result[i][0], '0000') ding_tree.create_node(dept_result[i][1], dept_result[i][0], '0') for i in range(len(dept_result)): #修改隶属关系 if dept_result[i][0] != '1' : #只要不是实根,就要修改隶属关系【钉钉中实根id为'1'且无上级部门,数据表dingding_department_list中存储id为'1'的部门上级为'0'】 if ding_tree.contains(dept_result[i][2]): #判断上级是否存在 ding_tree.move_node(dept_result[i][0], dept_result[i][2]) else: #没有上级的不修改 #print type(dept_result[i][2]), dept_result[i][2] #debug only continue #断开数据库 close_db(db) #return ding_tree return ding_tree.subtree('1')
def configure_tree_topology(self, root, degree=2, remove=False): """Configures the cluster's network topology as a tree. The tree consists of the specified root node and the nodes, which build the subtrees. The childrens are incrementally chosen, in other words, sequentially as specified in the config file. Arguments: root {integer} -- The tree's root node. Keyword Arguments: degree {integer} -- The maximum number of children (default: {2}) remove {boolean} -- Remove the configuration (default: {False}) """ self.logger.info("Configuring tree topology...") tree = Tree() root_node = self.topology.get_node(root) tree.create_node(root_node.name, root_node.node_id) parent_node = root for nodex in self.topology.nodes: if nodex.node_id == root_node.node_id: continue if len(tree.children(parent_node)) >= degree: if parent_node == root and root != 0: parent_node = 0 elif parent_node + 1 == root: parent_node += 2 else: parent_node += 1 tree.create_node(nodex.name, nodex.node_id, parent_node) self.logger.info("The following tree will be configured:") tree.show() for nodex in self.topology.nodes: self.logger.debug("%s:", nodex.name) subtree = tree.subtree(nodex.node_id) for nodey in self.topology.nodes: if nodex.node_id == nodey.node_id: continue if subtree.contains(nodey.node_id): children = tree.children(nodex.node_id) for child in children: if (child.identifier == nodey.node_id or tree.is_ancestor(child.identifier, nodey.node_id)): nodex.add_forwarding( nodey, self.topology.get_node(child.identifier)) break elif tree.parent(nodex.node_id) != None: nodex.add_forwarding( nodey, self.topology.get_node( tree.parent(nodex.node_id).identifier)) if not self.testing: self.topology.send_forwarding_tables(remove)
def get_path_to_santa(orbital_tree: Tree) -> list: path_to_santa = [] current_node = orbital_tree.parent('YOU') traversal_complete = False while not traversal_complete: if orbital_tree.subtree(current_node.identifier).contains('SAN'): for path in orbital_tree.subtree(current_node.identifier).paths_to_leaves(): if 'SAN' in path: path_to_santa += path[:-1] traversal_complete = True else: path_to_santa.append(current_node.identifier) current_node = orbital_tree.parent(current_node.identifier) return path_to_santa
def compare_actual_folder_with_tree(self, root: path, tree: Tree): root_name = tree.root root_path = root.joinpath(root_name) print(root_path) self.assertTrue(root_path.exists(), "The path {} should exist, but doesn't".format(root_path)) children = tree.children(root_name) for children in children: subtree = tree.subtree(children.identifier) self.compare_actual_folder_with_tree(root_path, subtree)
def trim_excess_root(tree: Tree) -> Tree: # Remove any nodes from the root that have only 1 child. # I.e, replace A → B → (C, D) with B → (C, D) root_id = tree.root branches = tree.children(root_id) if len(branches) == 1: tree.update_node(branches[0].identifier, parent=None, bpointer=None) new_tree = tree.subtree(branches[0].identifier) return trim_excess_root(new_tree) else: return tree
def our_cost(G: nx.Graph, T: tl.Tree) -> float: T_leaves = [n.tag for n in T.leaves()] cost = 0 for edge in G.edges: # only look at edges in this tree. if edge[0] in T_leaves and edge[1] in T_leaves: lca = get_lca(T, edge[0], edge[1]) subtree = T.subtree(lca) subtree_leaves = subtree.leaves() for leaf in subtree_leaves: cost += subtree.level(leaf.identifier) return cost
def get_descendents(account_id: str, account_tree: Tree) -> list: """ Return a list of tags of all descendent accounts of the input account. """ try: subtree_nodes = account_tree.subtree(account_id).all_nodes() descendent_list = [x.tag for x in subtree_nodes if x.tag != account_id] except tlexceptions.NodeIDAbsentError: descendent_list = [] return descendent_list
def collapse(t1: tl.Tree, t2: tl.Tree) -> tl.Tree: # work with copies. t1 = tl.Tree(tree=t1, deep=True) t2 = tl.Tree(tree=t2, deep=True) # reset all the identifiers: t1 = reset_ids(t1) t2 = reset_ids(t2) # paste all the children of t2 into the root of t1 for child in t2.children(t2.root): t1.paste(t1.root, t2.subtree(child.identifier)) return t1
def create_dummy_download_folder(root: path, tree: Tree) -> path: root_name = tree.root root_path = root.joinpath(root_name) if not root_path.exists(): print("Creating {}".format(root_path)) if root_name.endswith(".mp3"): root_path.touch() else: root_path.mkdir() time.sleep(0.01) # sleep to ensure that the created folders don't have the same ctime children = tree.children(root_name) for children in children: subtree = tree.subtree(children.identifier) create_dummy_download_folder(root_path, subtree) return root_path
def crossOver(individualA, individualB): tree = None while tree is None or tree.depth(tree.get_node(tree.root)) > TREE_MAX_DEPTH: treeA = Tree(tree = individualA.tree, deep=True) treeB = Tree(tree = individualB.tree, deep=True) regenerate_ids(treeA) regenerate_ids(treeB) removedNode = random.choice(treeA.all_nodes()) addedNode = random.choice(treeB.all_nodes()) addedSubtree = Tree(tree = treeB.subtree(addedNode.identifier), deep=True) if treeA.root == removedNode.identifier: tree = addedSubtree else: parent = treeA.parent(removedNode.identifier) treeA.remove_subtree(removedNode.identifier) treeA.paste(parent.identifier, addedSubtree) tree = treeA return Individual(tree)
class Parser(): """ Parser class for parsing a NAL xml file """ # All of the roots are coming from the TM queries # Other items in NAL may be considered phenotypes or chemicals # But are not being labelled for our purposes __root2label = { 322:"Phenotype", 156:"Phenotype", 319:"Phenotype", 7812:"Chemical", 8:"Chemical", 264:"Chemical", 858:"Plant" } def __init__(self, namespace="usda_nal_thesaurus"): self.namespace = namespace self.tree = Tree() self.tree.create_node("Root","root") self.name2id = {} def parse(self, xmlFile): """ parsing function that parses an xml file """ nodes = {} tree = ET.parse(xmlFile) root = tree.getroot() # THESAURUS node for concept in root: # Parse xml with nodes as concepts iden = "NAL:"+concept.find("TNR").text # node source ID nodes[iden], name = self.parseNode(concept) # Parse the node from xml file self.tree.create_node(tag=name, identifier=iden, parent='root') # create the node in the tree self.name2id[name] = iden # For mapping edges # Iterate through twice because parent/child relationships are connected via name not id # in xml file and file is sorted alphabetically for nodeID, props in nodes.items(): # Add edges to node as a list of tuples nodes[nodeID]["edges"] = self.parseEdges(nodeID,props) for node,label in self.__root2label.items(): # Add specific labels to nodes sub = self.tree.subtree("NAL:{}".format(node)) for i in sub.all_nodes(): iden = i.identifier.split(".")[0] nodes[iden]["labels"].add(label) return nodes def parseNode(self,node): """ Parse each node from the xml file """ labels = set() name = node.find("DESCRIPTOR").text synonyms = extractElem(node, "UF") # Used for parents = extractElem(node, "BT") # Broader term children = extractElem(node, "NT") # Narrow term # associated = extractElem(node,"RT") # Related term # categories = extractElem(node, "SC") # Subject category # for cat in categories: # item = cat.split(" ")[0] # if item in self.labels.keys(): # labels.add(self.labels[item]) labels.add(self.namespace) return {"name": name, "synonyms": list(synonyms), "parents": parents, "children": children, "labels": labels}, name def parseEdges(self, nodeID, props): """ Add edges to nodes. Some nodes have multiple parents """ edges = [] multiParent = 0 for name in props["children"]: edges.append(("has_child",self.name2id[name])) for name in props["parents"]: parentID = self.name2id[name] if multiParent == 0: self.tree.move_node(nodeID,parentID) else: self.tree.create_node(tag=props["name"],identifier=nodeID+".{}".format(multiParent),parent=parentID) edges.append(("is_a",parentID)) multiParent += 1 return edges
class CoreCommand: "Base command" _events = {} _entries = {} _native_pool = None _progress_counter = itertools.count(1) _progresses = LRUCache(1000 * 1000) def __new__(cls, *args, **kwargs): obj = super(CoreCommand, cls).__new__(cls) obj._get_commands() return obj def __init__(self, priority=constants.Priority.Normal): self._created_time = arrow.now() self.command_id = None self._started_time = None self._finished_time = None self._priority = priority self._futures = [] self._progress_max = None self._progress_current = None self._progress_text = None self._progress_count = None self._progress_type = None self._progress_tree = None self._progress_time = None self._progress_timestamp = 0 self._progress_title = self.__class__.__name__ def _run(self, *args, **kwargs): """ Run the command with *args and **kwargs. """ log.d("Running command:", self.__class__.__name__) r = self.main(*args, **kwargs) log.d("Finished running command:", self.__class__.__name__) return r @classmethod def get_all_progress(cls): ps = [] for c, t in cls._progresses.items(): x = t.get_node(t.root).data() if x: ps.insert(0, x.get_progress()) return ps def _add_progress(self, add=True): if not self._progress_count: self._progress_count = next(self._progress_counter) if self._progress_tree is None and self._progress_count not in self._progresses: self._progress_tree = Tree() if add: self._progresses[self._progress_count] = self._progress_tree self._progress_tree.create_node(self._progress_count, self._progress_count, data=weakref.ref(self)) self._progress_timestamp = arrow.now().timestamp self._progress_time = arrow.now() def merge(self, cmd): """ Merge this command into given command """ assert cmd is None or isinstance(cmd, CoreCommand) if cmd: self.merge_progress_into(cmd) return self def merge_progress_into(self, cmd): assert isinstance(cmd, CoreCommand) cmd._add_progress() self._add_progress(False) cmd._progress_tree.paste(cmd._progress_count, self._progress_tree) self._progress_tree = cmd._progress_tree if self._progress_count in self._progresses: del self._progresses[self._progress_count] def _str_progress_tree(self): self._tree_reader = "" def w(l): self._tree_reader = l.decode('utf-8') + '\n' try: self._progress_tree._Tree__print_backend(func=w) except tree_exceptions.NodeIDAbsentError: self._tree_reader = "Tree is empty" return self._tree_reader def get_progress(self): if self._progress_tree: log.d("Command", self, "progress tree:\n{}".format(self._str_progress_tree())) p = { 'title': self._progress_title, 'subtitle': '', 'subtype': None, 'text': '', 'value': .0, 'percent': .0, 'max': .0, 'type': self._progress_type, 'state': self.state.value if hasattr(self, "state") else None, 'timestamp': self._progress_timestamp } t = self._progress_tree.subtree(self._progress_count) prog_time = self._progress_time prog_text = self._progress_text if self._progress_text else '' prog_subtitle = '' prog_subtype = None for _, n in t.nodes.items(): cmd = n.data() if cmd: if cmd._progress_max: p['max'] += cmd._progress_max if cmd._progress_current: p['value'] += cmd._progress_current if not prog_time or (cmd._progress_time and cmd._progress_time > prog_time): prog_text = cmd._progress_text prog_subtitle = cmd._progress_title prog_subtype = cmd._progress_type if p['max']: p['percent'] = (100 / p['max']) * p['value'] else: p['percent'] = -1.0 p['text'] = prog_text p['subtitle'] = prog_subtitle p['subtype'] = prog_subtype return p return None def set_progress(self, value=None, text=None, title=None, type_=None): assert value is None or isinstance(value, (int, float)) assert text is None or isinstance(text, str) assert title is None or isinstance(text, str) self._add_progress() if title is not None: self._progress_title = title if value is not None: self._progress_current = value if text is not None: self._progress_text = text if type_ is not None: self._progress_type = type_ def set_max_progress(self, value, add=False): assert isinstance(value, (int, float)) self._add_progress() if add: if self._progress_max is None: self._progress_max = 0 self._progress_max += value else: self._progress_max = value def next_progress(self, add=1, text=None, _from=0): assert isinstance(add, (int, float)) if self._progress_current is None: self._progress_current = _from if text is not None: self._progress_text = text self._progress_current += add utils.switch(self._priority) @contextmanager def progress(self, max_progress=None, text=None): if max_progress is not None: self.set_max_progress(max_progress) yield if max_progress is not None: self.set_progress(max_progress, text) def run_native(self, f, *args, **kwargs): f = async_utils.AsyncFuture( self, self._native_pool.apply_async(_native_runner(f), args, kwargs)) self._futures.append(f) return f def push(self, msg, scope=None): if constants.notification: return constants.notification.push(msg, scope=scope) # TODO: raise error perhaps? def kill(self): [f.kill() for f in self._futures] def _log_stats(self, d=None): create_delta = self._finished_time - self._created_time run_delta = self._finished_time - self._started_time log_delta = (d - self._finished_time) if d else None log.i( "Command - '{}' -".format(self.__class__.__name__), "ID({})".format(self.command_id) if self.command_id else '', "running time:\n", "\t\tCreation delta: {} (time between creation and finish)\n". format(create_delta), "\t\tRunning delta: {} (time between start and finish)\n".format( run_delta), "\t\tLog delta: {} (time between finish and this log)\n".format( log_delta), ) def __del__(self): if hasattr(self, '_progress_count') and hasattr(self, '_progresses'): if self._progress_count and self._progress_count in self._progresses: del self._progresses[self._progress_count] @classmethod def _get_commands(cls, self=None): "" if self is not None: cls = self events = {} entries = {} for a in cls.__dict__.values(): if isinstance(a, CommandEvent): a.command_cls = cls events[a.name] = a a._init() if isinstance(a, CommandEntry): a.command_cls = cls entries[a.name] = a a._init() cls._entries = entries cls._events = events return entries, events
parentId = container["parentId"] if containername != "Tenant": if tree.contains(parentName): if tree.contains(containername) is False: tree.create_node(containername, containername, parent=parentName) else: getcontainerbyid = clntapi.get_container_by_id(parentId) parent_parentname = getcontainerbyid["parentName"] tree.create_node(parentName, parentName, parent=parent_parentname) tree.create_node(containername, containername, parent=parentName) if containername == targetcontainer: targetcontainerkey = container["key"] sub_t = tree.subtree(containertobemoved) sub_t.show() paths_to_leaves = sub_t.paths_to_leaves() movedcontainers = [] for paths in paths_to_leaves: if suffixremove == "n": parentname = paths[0] + "_temp" for container in paths: if container == containertobemoved: if container not in movedcontainers: addContainer = clntapi.add_container( container + "_temp", targetcontainer, targetcontainerkey) getcontainerbyname = clntapi.get_container_by_name(
def __init__(self): self.mfest = load_manifest("../chapters.yaml") self.books = {} self.chaps = {} for adef in self.mfest: for defheader, defs in adef.items(): if not defheader.startswith("BOOK_"): self.chaps[defheader] = defs else: self.books[defheader] = defs # # nested dict approach, not working very well ''' for title, bookchaps in self.books.items(): print ("BOOK: {title}".format(title=title)) print ("+" * 80) book = {title: bookchaps} pprint (book) print ("-" * 80) pprint (expand_def(book, self.chaps)) print ("*" * 80) #pprint(books) ''' # # tree approach, better self.treechap = {} for title, chap in self.chaps.items(): self.treechap[title] = create_tree({title: chap}) self.treebook = {} for title, book in self.books.items(): self.treebook[title] = create_tree({title: book}) for title, tree in self.treebook.items(): # tree.show() for node in tree.expand_tree(mode=Tree.DEPTH): # print ("+", node) realtag = node if type(realtag) is Node: realtag = node.tag if "|" in realtag: realtag = realtag.split("|")[1] if realtag.startswith("$ref:"): chapkey = realtag.split("$ref:")[1] newtree = Tree(tree=self.treechap[chapkey], deep=True) # move up its children to replace totally the root subtree = newtree.subtree( newtree.children(newtree.root)[0].tag) newtree = subtree for anode in tree.children(node): origtag = anode.tag if "|" in origtag: origtag = anode.tag.split("|")[1] # print (origtag) newtree.create_node(timestamp_node(origtag), origtag, parent=newtree.root, data=time()) # find parent node of the node to be replaced parent = tree.parent(node) # use the old timestamp data to preserve insertion order newtree.get_node( newtree.root).data = tree.get_node(node).data # remove old node tree.remove_subtree(node) # replace with new expanded node tree.paste(parent.identifier, newtree)
with urllib.request.urlopen('http://www.image-net.org/api/xml/structure_released.xml') as response: html = response.read() tree = ElementTree(fromstring(html)) root = tree.getroot() synsetTree = Tree() synsetTree.create_node('Entity', 'fall11', data = Confidence(0, 0)) for synset in root.iter('synset'): for child in synset: if child.get('wnid') in synsetTree._nodes: continue synsetTree.create_node(child.get('words'), child.get('wnid'), parent = synset.get('wnid'), data = Confidence(0, 0)) # synsetTree.show() treeDog = synsetTree.subtree('n02087122') model_file = "tf_files/retrained_graph.pb" graph = load_graph(model_file) def image_label(file_ID, file_suffix, graph): if __name__ == "__main__": start = time.clock() file_name = "tf_files/ImageNet_test" + '/' + file_ID + '/' + file_suffix model_file = "tf_files/retrained_graph.pb" label_file = "tf_files/retrained_labels.txt" input_height = 224 input_width = 224 input_mean = 128 input_std = 128
print("#"*4 + "All family members in DEPTH mode") for node in tree.expand_tree(mode=Tree.DEPTH): print tree[node].tag print('\n') print("#"*4 + "All family members without Diane sub-family") tree.show(idhidden=False, filter=lambda x: x.identifier != 'diane') # for node in tree.expand_tree(filter=lambda x: x.identifier != 'diane', mode=Tree.DEPTH): # print tree[node].tag print('\n') print("#"*4 + "Let me introduce Diane family only") sub_t = tree.subtree('diane') sub_t.show() print('\n') print("#"*4 + "Children of Diane") print tree.is_branch('diane') print('\n') print("#"*4 + "OOhh~ new members enter Jill's family") new_tree = Tree() new_tree.create_node("n1", 1) # root node new_tree.create_node("n2", 2, parent=1) new_tree.create_node("n3", 3, parent=1) tree.paste('jill', new_tree)
# for key, value in dictionary.items(): # if type(value) is dict: # if root: # t = 5 # root_time = t # else: # print('t', root_time) # print(key, value) # recursive_items(value, False, root_time) # else: # print(key, value) # # a = {'a': {1: {1: 2, 3: 4}, 2: {5: 6}}} # # recursive_items(a, True, 0) from treelib import Node, Tree tree = Tree() tree.create_node("Harry", "harry") # root node tree.create_node("Jane", "jane", parent="harry") tree.create_node("Bill", "bill", parent="harry") tree.create_node("Diane", "diane", parent="jane") tree.create_node("Mary", "mary", parent="diane") tree.create_node("Mark", "mark", parent="jane") tree.show() sub_tree = tree.subtree("mark") sub_tree.show() print(len(tree.children(tree.root)))
print("#"*4 + "Breakdown of out family") tree.show() print('\n') print("#"*4 + "All family members in DEPTH mode") for node in tree.expand_tree(mode=Tree.DEPTH): print tree[node].tag print('\n') print("#"*4 + "All family members without Diane sub-family") for node in tree.expand_tree(filter=lambda x: x != 'diane', mode=Tree.DEPTH): print tree[node].tag print('\n') print("#"*4 + "Let me introduce Diane family only") sub_t = tree.subtree('diane') sub_t.show() print('\n') print("#"*4 + "Children of Diane") print tree.is_branch('diane') print('\n') print("#"*4 + "OOhh~ new members enter Jill's family") new_tree = Tree() new_tree.create_node("n1", 1) # root node new_tree.create_node("n2", 2, parent=1) new_tree.create_node("n3", 3, parent=1) tree.paste('jill', new_tree) tree.show() print('\n')
class Group(ElementWithAttributes): def __init__(self): super(Group, self).__init__() self.type = DATA_DIR_TYPES.GROUP self.path = None self.tree = Tree() def __getitem__(self, item): if item not in self.tree: rsplit = item.rsplit("/", maxsplit=1) if len(rsplit) == 1: item_0 = self.tree.root key = rsplit[0] else: item_0, key = rsplit if item_0 in self.tree: node = self.tree[item_0] if (isinstance(node.data, ElementWithAttributes) and key in node.data.attrs): return node.data.attrs[ key] # ### RETURN attribute value ### raise KeyError(f"{item} is not a valid key") node = self.tree[item] if isinstance(node.data, Group): # rebuild tree with reduced identifiers stree = self.tree.subtree(item) for n in stree.all_nodes_itr(): if n.predecessor(stree.identifier) is None: parent = None else: parent = n.predecessor(stree.identifier).split( item, maxsplit=1)[1] node.data.tree.create_node(n.tag, n.identifier.split(item, maxsplit=1)[1], parent, data=n.data) elif isinstance(node.data, DataSet): if node.data.df.empty: if self.path is None: raise GroupError( f"{item} is not loaded yet and this element is not linked to a File or Group" ) node.data.df = pd.read_parquet(self.path / item / DATA_FILE) return node.data def __setitem__(self, key, value): if key in self.tree: raise KeyError(f"{key} already exists") rsplit = key.rsplit("/", maxsplit=1) if len(rsplit) == 1: item_0 = self.tree.root key_1 = rsplit[0] else: item_0, key_1 = rsplit if item_0 is not None and item_0 not in self.tree: raise KeyError(f"Parent key {item_0} does not exist") dd_type = None if isinstance(value, Group): dd_type = value.type new_tree = Tree() for node in value.tree.all_nodes_itr(): if node.parent is None: parent = None else: parent = key + "/" + node.parent new_tree.create_node(node.tag, key + "/" + node.identifier, parent=parent, data=node.data) value.tree = new_tree self.tree.create_node(tag=key_1, identifier=key, parent=item_0, data=value) self.tree.paste(key, new_tree) elif isinstance(value, DataSet): dd_type = DATA_DIR_TYPES.DATASET self.tree.create_node(tag=key_1, identifier=key, parent=item_0, data=value) if self.path is not None: value.df.to_parquet(self.path / key / DATA_FILE) elif isinstance(value, Raw): pass elif isinstance(value, Attribute): pass else: raise ValueError(f"{value} is not a valid type for DataDir") # write ddir and attributes file if self is linked if isinstance(value, ElementWithAttributes) and self.path is not None: (self.path / key).mkdir() _write_ddir_json(self.path / key, dd_type=dd_type) json.dump(value.attrs, (self.path / key / ATTRIBUTES_FILE).open("w"), indent=4) def link(self, path): self.path = path
class TreeT(object): def __init__(self, max_id=0): self.tree = Tree() def from_ptb_to_tree(self, line, max_id=0, leaf_id=1, parent_id=None): # starts by ['(', 'pos'] pos_tag = line[1] if parent_id is None: pos_id = 0 else: pos_id = max_id max_id += 1 self.tree.create_node(pos_tag, pos_id, parent_id, TreeData()) parent_id = pos_id total_offset = 2 if line[2] != '(': # sub-tree is leaf # line[0:3] = ['(', 'pos', 'word', ')'] word_tag = line[2] self.tree.create_node(word_tag, leaf_id, parent_id, TreeData()) return 4, max_id, leaf_id + 1 line = line[2:] while line[0] != ')': offset, max_id, leaf_id = self.from_ptb_to_tree( line, max_id, leaf_id, parent_id) total_offset += offset line = line[offset:] return total_offset + 1, max_id, leaf_id def add_height(self, tree_dep): for n in self.tree.all_nodes(): n.data.leaves = [] for leaf in self.tree.leaves(): lid = leaf.identifier hid = tree_dep[lid] if hid == self.tree.root: self.tree[lid].data.height = self.tree.depth(self.tree[lid]) for cid in [ p for p in self.tree.paths_to_leaves() if lid in p ][0]: self.tree[cid].data.leaves += [lid] else: height = -1 cid = lid cond = True while cond: self.tree[cid].data.leaves += [lid] height += 1 cid = self.tree.parent(cid).identifier cid_leaves = [l.identifier for l in self.tree.leaves(cid)] cid_l_dep = [tree_dep[l] for l in cid_leaves if l != lid] cond = set(cid_l_dep).issubset(set(cid_leaves)) self.tree[lid].data.height = height x_nodes = [ n.identifier for n in self.tree.all_nodes() if n.data.leaves == [] ] for x_node in x_nodes[::-1]: min_id = min(self.tree.children(x_node), key=lambda c: c.data.height) _lid = min_id.data.leaves[0] self.tree[_lid].data.height += 1 self.tree[x_node].data.leaves += [_lid] return True def _from_tree_to_ptb(self, nid): nid = self.tree.subtree(nid).root if self.tree[nid].is_leaf(): return ' (' + self.tree[nid].tag + ' ' + self.tree[ nid].data.word + ')' res = ' (' + self.tree[nid].tag for c_nid in sorted(self.tree.children(nid), key=lambda x: x.identifier): res += self._from_tree_to_ptb(c_nid.identifier) return res + ')' def from_tree_to_ptb(self): return self._from_tree_to_ptb(self.tree.root) def from_tag_to_tree(self, tag, word, pos_id=0): parent_id = None for tag_nodes in tag: if tag_nodes[0] in [CL, CR]: c_side = tag_nodes[0] _tag_nodes = tag_nodes[1:] if len(tag_nodes) > 1 else [''] else: c_side = '' _tag_nodes = tag_nodes self.tree.create_node(_tag_nodes[0], pos_id, parent=parent_id, data=TreeData(comb_side=c_side)) parent_id = pos_id pos_id += 1 for tag_node in _tag_nodes[1:]: self.tree.create_node(tag_node[1:], pos_id, parent=parent_id, data=TreeData(miss_side=tag_node[0])) pos_id += 1 for l in self.tree.leaves(): if l.data.miss_side == '': l.data.word = word break return pos_id @memoize def is_combine_to(self, side): return self.tree[self.tree.root].data.comb_side == side @memoize def is_combine_right(self): return self.is_combine_to(CR) @memoize def is_combine_left(self): return self.is_combine_to(CL) @memoize def is_complete_tree(self): return all([n.data.miss_side == '' for n in self.tree.all_nodes()]) @memoize def get_missing_leaves_to(self, miss_val, side): return [ l.identifier for l in self.tree.leaves(self.tree.root) if l.data.miss_side == side and l.tag == miss_val ] @memoize def get_missing_leaves_left(self, miss_val): return self.get_missing_leaves_to(miss_val, L) @memoize def get_missing_leaves_right(self, miss_val): return self.get_missing_leaves_to(miss_val, R) @memoize def root_tag(self): return self.tree[self.tree.root].tag @memoize def is_no_missing_leaves(self): return all( [l.data.miss_side == '' for l in self.tree.leaves(self.tree.root)]) @memoize def combine_tree(self, _tree, comb_leaf): self.tree.paste(comb_leaf, _tree.tree) self.tree.link_past_node(comb_leaf) return self def tree_to_path(self, nid, path): # Stop condition if self.tree[nid].is_leaf(): path[nid] = [] return nid, self.tree[nid].data.height # Recursion flag = CR for child in self.tree.children(nid): cid = child.identifier leaf_id, height = self.tree_to_path(cid, path) if (height == 0): # Reached end of path can add flag path[leaf_id].insert(0, flag) # path[leaf_id].append(flag) if height > 0: path[leaf_id].insert(0, nid) # only single child will have height>0 # and its value will be the one that is returned # to the parent ret_leaf_id, ret_height = leaf_id, height - 1 # once we reached a height>0, it means that # this path includes the parent, and thus flag # direction should flip flag = CL return ret_leaf_id, ret_height def path_to_tags(self, path): tags = [] for p in path: _res = [] _p = copy.copy(p) if _p[0] in [CL, CR]: _res.append(_p[0]) _p = _p[1:] while _p[:-1]: el_p = _p.pop(0) _res.append(self.tree[el_p].tag) for c in self.tree.children(el_p): if c.identifier != _p[0]: _res.append(R + c.tag if c.identifier > _p[0] else L + c.tag) _res.append(self.tree[_p[0]].tag) tags.append(_res) return tags def path_to_words(self, path): return [self.tree[k].tag for k in path] def from_tree_to_tag(self): path = {} self.tree_to_path(self.tree.root, path) return { 'tags': self.path_to_tags(path.values()), 'words': self.path_to_words(path.keys()) } def from_ptb_to_tag(self, line, max_id, depend): self.from_ptb_to_tree(line, max_id) self.add_height(depend) path = {} self.tree_to_path(self.tree.root, path) return self.path_to_tags(path.values())
class model: #future #3+3, arms, seasonality #3+3 #array of [#patients, stop period] #when reach #patient apply stop #trigger stop timer in congfig/iteration? #config groups #could consider config group, to combine configs to give max_patients, i.e 1 group per arm #config # consider adding treatment period, screening period to config for cohorts #country #max_patients per country #patients #actual patients are projected to enrol and complete? #PFS,OS #output #table with interation, config, country, site, patient, screned, enrolled, complete #option to use beta-pert? #multithreading for interations # add option to __add__ models to combine trees? def __init__(self, config_objs, num_iterations=1, screening_period=0, treatment_period=0): self.config_objs = config_objs self.num_iterations = num_iterations self.screening_period = screening_period self.treatment_period = treatment_period self.tree = Tree() def generate_model(self): #root node for a in ['model']: id_0 = a self.tree.create_node(a, id_0) #iterations for n in [str(i) for i in range(self.num_iterations)]: id_1 = n self.tree.create_node(n, id_1, id_0, data=None) #configs for config_obj in config_objs: for config_dict, config_key in [[ config_obj.setup_dict[config_key], config_key ] for config_key in config_obj.setup_dict]: id_2 = '/'.join([id_1, config_key]) self.tree.create_node(config_key, id_2, parent=id_1, data=config( config_obj.setup_dict, config_obj.max_patients, config_obj.current_timestep)) #countries for country_info, country_dict, country_key in [[ config_dict[country_key][0], config_dict[country_key][1], country_key ] for country_key in config_dict]: id_3 = '/'.join([id_2, country_key]) num_sites, screen_rate_low, screen_rate_med, screen_rate_high, setup_time_low, setup_time_med, setup_time_high, screen_fail_rate, drop_out_rate = country_info self.tree.create_node( country_key, id_3, parent=id_2, data=country(num_sites, screen_rate_low, screen_rate_med, screen_rate_high, setup_time_low, setup_time_med, setup_time_high, screen_fail_rate, drop_out_rate)) generated_sites = 0 #sites for site_info, site_dict, site_key in [[ country_dict[site_key][0], country_dict[site_key][1], site_key ] for site_key in country_dict]: id_4 = '/'.join([id_3, site_key]) screen_rate, setup_time = site_info if site_key.find('__') != 0: self.tree.create_node(site_key, id_4, parent=id_3, data=site( setup_time, screen_rate)) generated_sites += 1 #patients #screening_period, treatment_period, screen_fail_rate, drop_out_rate, screen_dt, enrol_dt, complete_dt for patient_dict, patient_key in [[ site_dict[patient_key], patient_key ] for patient_key in site_dict]: id_5 = '/'.join([id_4, patient_key]) if patient_key.find('__') != 0: screen_dt, enrol_dt, complete_dt = patient_dict self.tree.create_node( patient_key, id_5, parent=id_4, data=patient( self.screening_period, self.treatment_period, screen_fail_rate, drop_out_rate, screen_dt, enrol_dt, complete_dt)) if enrol_dt != None: config_obj.patients_enrolled += 1 #other sites for s in range(num_sites - generated_sites): id_4 = '/'.join([id_3, str(s)]) country_node = self.tree.get_node(id_3) sr = country_node.data.triangular_screen_rate() st = country_node.data.triangular_setup_time() st = st if st > config_obj.current_timestep else config_obj.current_timestep self.tree.create_node(str(s), id_4, parent=id_3, data=site(st, sr)) def show_model(self, iteration=0): if iteration == -1: self.tree.show() else: self.sub_tree = self.tree.subtree( str(iteration )) #need exception for if seletion if > num interations self.sub_tree.show() @staticmethod def simulate(model_obj, start_dt, max_timestep=1000): start_dt = datetime.strptime(start_dt, '%d-%m-%Y') for timestep in range(max_timestep): for iteration_node in model_obj.tree.children('model'): for config_node in model_obj.tree.children( iteration_node.identifier): if config_node.data.current_timestep > timestep: continue if config_node.data.enrolment_complete: continue # if reached max patient for country_node in model_obj.tree.children( config_node.identifier): for site_node in model_obj.tree.children( country_node.identifier): if site_node.data.setup_time > timestep: continue site_node.data.screen_patient_buffer += site_node.data.screen_rate if site_node.data.screen_patient_buffer >= 1: for i in range( int(site_node.data. screen_patient_buffer)): patient_id = '/'.join([ site_node.identifier, str(config_node.data.patients_enrolled) ]) model_obj.tree.create_node( str(config_node.data.patients_enrolled ), patient_id, parent=site_node.identifier, data=patient.from_timestep( timestep, start_dt, model_obj.screening_period, model_obj.treatment_period, country_node.data.screen_fail_rate, country_node.data.drop_out_rate)) patient_node = model_obj.tree.get_node( patient_id) if patient_node.data.enrolled_dt != None: config_node.data.patients_enrolled += 1 if config_node.data.patients_enrolled >= config_node.data.max_patients: config_node.data.enrolment_complete = True config_node.data.max_patient_dt = date.strftime( start_dt + timedelta(days=timestep), '%d-%m-%Y') break site_node.data.screen_patient_buffer = site_node.data.screen_patient_buffer % 1
def use_hyp(word2syn, output, data): un_change = [] dic = Tree() dic.create_node("100001740", "100001740") add = -1 while add != 0: add = 0 f = open(datapath + "wn_hyp.pl", "r") while True: line = f.readline() if not line: break else: l, r = re.findall('\d+', line) try: dic.create_node(l, l, parent=r) add += 1 except: pass print(dic.size()) entail = defaultdict(list) for n in dic.all_nodes(): for m in dic.subtree(n.tag).all_nodes(): if m.tag != n.tag: entail[n.tag].append(m.tag) label = set() for d in data: d0 = d[0] d1 = d[1] if p.singular_noun(d[0]) != False: d0 = p.singular_noun(d[0]) if p.singular_noun(d[1]) != False: d1 = p.singular_noun(d[1]) for i in word2syn[d0]: for j in word2syn[d1]: if j in entail[i]: if d[0] + "\t" + ">" + "\t" + d[1] not in output: output += [d[0] + "\t" + ">" + "\t" + d[1]] label.add(d) elif i in entail[j]: if d[0] + "\t" + "<" + "\t" + d[1] not in output: output += [d[0] + "\t" + "<" + "\t" + d[1]] label.add(d) if d not in un_change and d not in label: un_change += [d] print("before single: " + str(len(data)) + " after: " + str(len(un_change))) output += ["\n"] del entail data = un_change del un_change un_change = [] alter = defaultdict(list) for n in dic.all_nodes(): for m in dic.siblings(n.tag): if m.tag != n.tag and n.bpointer != m.tag: alter[n.tag].append(m.tag) label = set() for d in data: d0 = d[0] d1 = d[1] if p.singular_noun(d[0]) != False: d0 = p.singular_noun(d[0]) if p.singular_noun(d[1]) != False: d1 = p.singular_noun(d[1]) for i in word2syn[d0]: for j in word2syn[d1]: if j in alter[i]: if d[0] + "\t" + "|" + "\t" + d[1] not in output: output += [d[0] + "\t" + "|" + "\t" + d[1]] label.add(d) elif i in alter[j]: if d[0] + "\t" + "|" + "\t" + d[1] not in output: output += [d[0] + "\t" + "|" + "\t" + d[1]] label.add(d) if d not in un_change and d not in label: un_change += [d] del alter print("before single: " + str(len(data)) + " after: " + str(len(un_change))) output += ["\n"] return output, un_change
class FTPClient(QtWidgets.QLabel): """ FTP 连接类 """ _signal = pyqtSignal(str) def __del__(self): """ 退出时执行ftp断开 :return: """ print("connect close") self.ftp.close() #self._signal.emit('Del') def __init__(self, host: str, username: str, password: str, port='21'): """ 初始化 FTP 输入主机 端口用户名密码 之后连接FTP服务器 :param host: 主机 :param username: 用户名 :param password: 密码 :param port: 端口 """ print("init") super(FTPClient, self).__init__() self.host = host self.port = int(port) self.username = username self.password = password def startConnect(self): """ 建立FTP连接 :return: """ self.nowDirName = 'root' #建立文件树 和根节点 self.tree = Tree() itemProject = QStandardItem('root') itemProject.setIcon(self.getIcon()) self.tree.create_node('root', 'root', parent=None, data=itemProject) # 连接FTP 连接成功之后 创建root的子目录 self.ftp_connect() self.createTree(self.ftp.nlst(), 'root') #print('pwd',self.ftp.pwd()) # 以下注释部分完成了 在ftp上文件系统内部的跳转 和列出文件系统 #print('cwd0428',self.ftp.cwd('0428')) #print('nlst',self.ftp.nlst()) #print('pwd',self.ftp.pwd()) #print('cwd0428', self.ftp.cwd('Laser')) #print('nlst', self.ftp.nlst()) #print('pwd', self.ftp.pwd()) #self.createTree(self.ftp.nlst(), 'root/0428') self.tree.show() self._signal.emit("OK") # 信号发送 #print("EMIT OK") #print(self.tree.children('root')) #self.download_file('/readme.txt','G:/data_sun/readme.txt') def restartTree(self): print("刷新树") self.tree.remove_subtree('root') itemProject = QStandardItem('root') itemProject.setIcon(self.getIcon()) self.tree.create_node('root', 'root', parent=None, data=itemProject) self.ftp.cwd('/') self.createTree(self.ftp.nlst(), 'root') def createTree(self, chiledList: list, parent: str) -> bool: """ 通过输入的 子目录列表 和父目录的名称 进行建立文件树 :param chiledList: 下一层目录所有的文件列表 :param parent: 父路径名字 :return: 是否创建了子树 0创建失败 1创建成功 """ if self.tree.subtree(parent).depth() == 0: #当前子树深度 为0 那么说明还没有刷新该节点 print("叶节点,开始创建文件子树") else: print("不是叶节点") return 0 #按照列表内部的数据 依此建树 树的名称均为 父路径 + / + 当前文件名称(主要为了实现唯一标识 不然不同文件夹下相同的文件名 就会出错) for i in chiledList: itemProject = QStandardItem((parent + '/' + i)) #print((parent+'/'+i),(parent+'/'+i).split('.')) if len((parent + '/' + i).split('.')) == 1: #如果是文件夹 那么获取系统的文件夹的图标 itemProject.setIcon(self.getIcon()) else: itemProject.setIcon( self.getIcon('.' + (parent + '/' + i).split('.')[-1])) self.tree.create_node( parent + '/' + i.encode('utf-8').decode('utf-8'), parent + '/' + i.encode('utf-8').decode('utf-8'), parent=parent, data=itemProject) # 根节点 return 1 def ftp_connect(self): """ FTP的具体连接类 :return: None """ self.ftp = FTP() # ftp.set_debuglevel(2) #连接主机 self.ftp.connect(self.host, self.port) #实现登录 self.ftp.login(self.username, self.password) self.ftp.encoding = 'utf-8' print("log in success") def getIcon(self, extension='file'): """ 获取扩展名在操作系统下的默认图标 :param extension: 文件扩展名 如果不写默认为是文件 :return: 对应的图标 """ provider = QFileIconProvider() tmpFile = QTemporaryFile('./_aa' + extension) tmpFile.setAutoRemove(False) icon = provider.icon(QFileInfo('./_aa' + extension)) if extension == 'file': # 首先生成一个临时文件 之后获取临时文件的图标返回 fileInfo = QFileInfo("C:\\Users") fileIcon = QFileIconProvider() #print(fileInfo, fileIcon) icon = QIcon(fileIcon.icon(fileInfo)) return icon return icon def download_file(self, remotepath: str, localpath: str): """ 从远程FTP服务器下载文件 到本地路径 :param remotepath: 远端路径 :param localpath: 本地路径 :return: None """ remotepath = remotepath.replace('//', '/') localpath = localpath.replace('//', '/') if os.path.isdir(remotepath) or len(remotepath.split('.')) == 1: #是文件夹 self.download_dir(remotepath, localpath) return print("是文件") bufsize = 1024 fp = open(localpath, 'wb') self.ftp.retrbinary('RETR ' + remotepath, fp.write, bufsize) self.ftp.set_debuglevel(0) fp.close() print("下载远程文件:", remotepath, "\t到本地路径:", localpath, "成功") def download_dir(self, remotedir: str, localdir: str): """ 下载远程的文件夹到本地文件夹 例如 download_dir('/test','G:/ftpdata/test10') 或者download_dir('test','G:/ftpdata/test10') 后面这个会新建一个test文档 之前那个新建/test会报错 因此就不会创建 :param remotedir: 远程文件夹 :param localdir: 本地文件夹 :return: """ try: os.makedirs(localdir) # 由于我之前的处理是 将文件夹直接加到了 本地连接的后面 所以需要先新建一个文件夹 except OSError: print("本地文件已经存在,不进行新建") pass print("开始下载文件夹:从 ", remotedir, " 到 ", localdir) os.chdir(localdir) self.walk(remotedir, localdir) print("文件夹下载结束") def get_dirs_files(self): """ 获取当前目录的文件夹和文件 :return: (当前目录下的文件,当前目录下的文件夹) """ dir_res = [] self.ftp.dir('.', dir_res.append) files = [f.split(None, 8)[-1] for f in dir_res if f.startswith('-')] dirs = [f.split(None, 8)[-1] for f in dir_res if f.startswith('d')] return (files, dirs) def walk(self, remotedir, localdir): """ 在文件夹内部递归 单个传递每一个文件 直到文件夹内部文件全部传递完毕 :param remotedir: 远程文件夹 :param localdir: 本地文件夹 :return: """ print('Walking to', remotedir, os.getcwd()) self.ftp.cwd(remotedir) try: os.mkdir(remotedir) except OSError: print("创建文件夹失败,文件夹可能已经存在") pass os.chdir(localdir) print("now dir", os.getcwd()) ftp_curr_dir = self.ftp.pwd() print("local dir", localdir) files, dirs = self.get_dirs_files() print("FILES: ", files) print("DIRS: ", dirs) for f in files: print(remotedir, ':', f) outf = open(f, 'wb') try: self.ftp.retrbinary('RETR %s' % f, outf.write) finally: outf.close() for d in dirs: print("Dir:", d, ftp_curr_dir) os.chdir(localdir) #self.ftp.cwd(ftp_curr_dir) self.walk(d, os.path.join(localdir, d)) self.ftp.cwd('..') #不加这句的话 只能递归一层 之后会出错 def uploadFile(self, remotepath='./', localpath='./'): print("Upload", localpath, remotepath, os.path.isfile(localpath)) if not os.path.isfile(localpath): return print('+++ upload %s to %s' % (localpath, remotepath)) self.ftp.storbinary('STOR ' + remotepath, open(localpath, 'rb')) def upload_dir(self, remotedir='./', localdir='./'): ''' 实现文件的上传 :param localdir: :param remotedir: :return: ''' if not os.path.isdir(localdir): return print("Upload dir", remotedir, localdir) try: self.ftp.cwd(remotedir) except: self.ftp.mkd(remotedir) self.ftp.cwd(remotedir) print("远程文件夹创建成功") for file in os.listdir(localdir): # src = os.path.join(localdir, file) src = localdir + '/' + file print(src) if os.path.isfile(src): print("is file") self.uploadFile(file, src) elif os.path.isdir(src): try: self.ftp.mkd(file) except: sys.stderr.write('the dir is exists %s' % file) self.upload_dir(file, src) self.ftp.cwd('..') def upload_file(self, remotepath: str, localpath: str): """ 上传本地文件到服务器 :param remotepath: 远端路径 :param localpath: 本地路径 :return: None """ while '//' in remotepath: remotepath = remotepath.replace('//', '/') while '//' in localpath: localpath = localpath.replace('//', '/') print(remotepath, localpath) if os.path.isdir(remotepath) or len(remotepath.split('.')) == 1: #是文件夹 self.upload_dir(remotepath, localpath) return bufsize = 1024 fp = open(localpath, 'rb') self.ftp.storbinary('STOR ' + remotepath, fp, bufsize) self.ftp.set_debuglevel(0) fp.close() print("上传本地文件:", localpath, "\t到远程:", remotepath, "成功")
class ParentChildEvaluate: """ Class to perform intrinsic evaluation of embeddings using the hierarchical relation of parent/child domains 1) parse ParendChildTreeFile.txt from interpro 2) for each child of root nn = ask embeddings model to give M nearest neighbors calculate_precision_atM(child.descendants, nn) calculate_recall_atN(child.descendants, nn) 3) plot histogram of precision and recall #Credits: https://medium.com/@m_n_malaeb/recall-and-precision-at-k-for-recommender-systems-618483226c54 """ def __init__(self, data_path): """ ParentChildEvaluate class init Parameters ---------- data_path : str full data path Returns ------- None """ print("ParentChildEvaluate") self.data_path = data_path self.tree = Tree() def get_model_name(self): """ Get embedding model name Parameters ---------- Returns ------- str embedding model name """ return ntpath.basename(self.model_file) def load_emb_model(self, model_file, is_model_binary): """ Load embedding model Parameters ---------- model_file : str model file name is_model_binary : bool model is saved in binary format (True), otherwise (False) Returns ------- None """ self.model_file = model_file self.emb_model = KeyedVectors.load_word2vec_format( model_file, binary=is_model_binary) def parse_parent_child_file(self, parent_child_file_name, out_path, output_file_name, save_parsed_tree=False): """ Parse the parent child file Parameters ---------- parent_child_file_name : str parent child file name out_path : str output data path output_file_name : str output file name save_parsed_tree : bool after parsing save parsed tree (True), otherwise (False) Returns ------- None """ previous_num_minus_signs = 0 last_interpro_id = None self.tree.create_node("INTERPRO", "INTERPRO") current_parent = "INTERPRO" with open(parent_child_file_name, 'r') as parent_child_file: for line in parent_child_file: line = line.strip() current_num_minus_signs = line[0:line.find("IPR")].count("--") double_colon_split = line.strip("--").split("::") interpro_id = double_colon_split[0] assert interpro_id[ 0: 3] == "IPR", "AssertionError: {} \n interpro id should start with IPR and has length of 9.".format( interpro_id) if current_num_minus_signs == 0: # assert child not in the tree current_parent = "INTERPRO" self.tree.create_node(interpro_id, interpro_id, parent=current_parent) else: # check if you are still with current parent or you need to create a new one if current_num_minus_signs == previous_num_minus_signs: # same level as last parent self.tree.create_node(interpro_id, interpro_id, parent=current_parent) elif current_num_minus_signs > previous_num_minus_signs: # one level down from last parent -> create new parent current_parent = last_interpro_id self.tree.create_node(interpro_id, interpro_id, parent=current_parent) else: # one level up from last parent -> get parent of the current parent if current_parent == "INTERPRO": # if one level up is the root then your papa is the root papa = "INTERPRO" else: # if one level up is not the root then get the parent of your parent (papa) papa = self.tree[current_parent].bpointer self.tree.create_node(interpro_id, interpro_id, parent=papa) current_parent = papa previous_num_minus_signs = current_num_minus_signs last_interpro_id = interpro_id # quick test # for interpro_node in self.tree.children("IPR000549"): # print(interpro_node.identifier) # self.tree.show() if save_parsed_tree: self.tree.save2file( filename=os.path.join(out_path, output_file_name)) def get_nn_calculate_precision_recall_atN(self, N, plot_histograms, save_diagnostics): """ Get nearest domain vector for each domains and calculate recall based on the ground truth (parsed tree) Parameters ---------- N : int number of nearest domain vector, if N==100 then retrieve as many as the children of a domain in the parsed tree plot_histograms : bool plot histograms for performance metrics (True), otherwise (False) save_diagnostics : bool save diagnostic plots for domain with low recall Returns ------- None """ print("Get NN and calculate precision and recall at {}".format(N)) recalls_n = [] precisions_n = [] interpros_recall0 = [] interpros_num_children_recall0 = [] if N == 100: retrieve_all_children = True else: retrieve_all_children = False for interpro_node in self.tree.children("INTERPRO"): recall_n = 0.0 precision_n = 0.0 all_children = self.tree.subtree( interpro_node.identifier).all_nodes() assert interpro_node in all_children, "AssertionError: parent {} is not in the set of all children.".format( interpro_node.identifier) all_children.remove(interpro_node) if retrieve_all_children: N = len(all_children) if self.emb_model.__contains__(interpro_node.identifier): nearest_neighbor_ids = set([ nn[0] for nn in self.emb_model.most_similar( positive=interpro_node.identifier, topn=N) ]) else: print("Model does not contain this id.") continue true_positives = set([child.identifier for child in all_children ]).intersection(nearest_neighbor_ids) assert len(all_children) > 0 and len( nearest_neighbor_ids ) == N, "AssertionError: For parent {} all children should be > 0 and nearest neighbors should be equal to N.".format( interpro_node.identifier) recall_n = len(true_positives) / len(all_children) precision_n = len(true_positives) / len(nearest_neighbor_ids) assert 0.0 <= recall_n <= 1.0 and 0.0 <= precision_n <= 1.0, "AssertionError: For parent {} recall or precision is not at (0,1]".format( interpro_node.identifier) recalls_n.append(recall_n) precisions_n.append(precision_n) if recall_n == 0.0: interpros_recall0.append(interpro_node.identifier) interpros_num_children_recall0.append(len(all_children)) if retrieve_all_children: # for printing in title N = 100 if plot_histograms: if retrieve_all_children: self.plot_histogram(recalls_n, "Recall", "Recall", "Number of Interpro domains", "recall") else: self.plot_histogram(recalls_n, "Recall@{}".format(N), "Recall", "Number of Interpro domains", "recall_{}".format(N)) self.plot_histogram(precisions_n, "Precision@{}".format(N), "Precision", "Number of Interpro domains", "precision_{}".format(N)) if retrieve_all_children: avg_recall = sum(recalls_n) / len(recalls_n) print("Average recall at 100: {:.3f}".format(avg_recall)) if save_diagnostics: self.save_diagnostics_recall0(interpros_recall0, interpros_num_children_recall0) def save_diagnostics_recall0(self, interpros_recall0, interpros_num_children_recall0): """ Save diagnostics histogram for domains with recall of 0 Parameters ---------- interpros_recall0 : list of str interpro ids with recall 0 interpros_num_children_recall0 : list of str number of children of each interpro id, found from the parsed tree, with recall 0 Returns ------- None """ print("Saving diagnostics for intepro domains with recall 0") with open( os.path.join( self.data_path, self.get_model_name() + "_interpros_recall0" + ".txt"), "w") as interpros_recall0_file: # write file with names of interpro having recall 0 interpros_recall0_file.write("\n".join(interpros_recall0)) # plot histogram of number of children for interpro parents with recall 0 self.plot_histogram(interpros_num_children_recall0, None, "Number of Intepro domains", "Number of children", "hist") def plot_histogram(self, performance_N, title, xlabel, ylabel, out_suffix): """ Plot histogram for performance metric and also for the number of children Parameters ---------- performance_N : list of float performance metric value per parent domain title : str histogram title (if not None) xlabel : str label x ylabel : str label y out_suffix : str histogram output file name suffix Returns ------- None """ # plot the histogram of lengths fig = plt.figure() plt.hist(performance_N, color='g', align='left', edgecolor='k', alpha=0.8) plt.xlabel(xlabel, fontsize=14) plt.ylabel(ylabel, fontsize=14) if title is not None: plt.title(title, fontsize=14) plt.xticks(np.arange(0, 1.1, 0.1)) hist_name = self.get_model_name() + "_" + out_suffix + ".png" fig.savefig(os.path.join(self.data_path, hist_name), bbox_inches='tight', dpi=600)
def tree_answer(input_json): root_post = input_json["root_post"]["root_post"] posts = input_json["posts"] ids = posts["ids"] text = posts["text"] parent_ids = posts["parent_ids"] scores = posts["scores"] categories = posts["categories"] posts = [] for i in range(len(ids)): posts.append( [ids[i], text[i], parent_ids[i], categories[i], scores[i]]) # root_post: id, text, category, score # other posts: id, text, parent id, category, score tree = Tree() root_post[1] = root_post[1].replace('\n', ' ') for post in posts: post[1] = post[1].replace('\n', ' ') tree.create_node("root post", root_post[0], data=ForumPost(root_post[1], "none", "none")) for post in posts: id = post[0] text = post[1] parent_id = post[2] category = post[3] score = post[4] tree.create_node(str(score) + ": " + category + ": " + text[:20], id, parent=parent_id, data=ForumPost(text, category, score)) """ Stuff now: Go through each child to the root. If solution: In each subtree, find number of helpfuls, add up. Put the child: [name, # of helpfuls] in a results array """ children_to_root = [tree[node].identifier for node in tree.expand_tree()] results = [] for identifier in children_to_root: if (tree[identifier].data.category == "solution"): sub_t = tree.subtree(identifier) sub_t_scores = [ tree[node].data.score for node in sub_t.expand_tree() ] sub_t_categories = [ tree[node].data.category for node in sub_t.expand_tree() ] total_score = 0 for i in range(len(sub_t_scores)): if sub_t_categories[i] != "other": total_score += sub_t_scores[i] results.append([total_score, tree[identifier].data.text]) results.sort(key=lambda x: x[0], reverse=True) score = [] post = [] for i in results: score.append(i[0]) post.append(i[1]) return (score, post)
sep = "-" * 20 + "\n" print(sep + "Tree of the whole family:") tree.show(key=lambda x: x.tag, reverse=True) print(sep + "All family members in DEPTH mode:") for node in tree.expand_tree(mode=Tree.DEPTH): print(tree[node].tag) print(sep + "All family members without Diane sub-family:") tree.show(idhidden=False, filter=lambda x: x.identifier != "diane") # for node in tree.expand_tree(filter=lambda x: x.identifier != 'diane', mode=Tree.DEPTH): # print tree[node].tag print(sep + "Let me introduce Diane family only:") sub_t = tree.subtree("diane") sub_t.show() print(sep + "Children of Diane") for child in tree.is_branch("diane"): print(tree[child].tag) print(sep + "OOhh~ new members join Jill's family:") new_tree = Tree() new_tree.create_node("n1", 1) # root node new_tree.create_node("n2", 2, parent=1) new_tree.create_node("n3", 3, parent=1) tree.paste("jill", new_tree) tree.show() print(sep + "They leave after a while:")
def _get_descendants_from_tree(node_identifier: str, tree: Tree) -> Set[str]: sub_tree = tree.subtree(node_identifier) descendants = {node.identifier for node in sub_tree.all_nodes()} return descendants