def optimized_structural_coding_tree(G):
    """
    optimized_structural_coding_tree is a function that estimates the 2-dimensional strutural entropy (i.e. structural information) of graph G
    param G: graph
    return: the structural information(entropy) and the structure of underlying entropy (i.e. the partition of G that the entropy is minimum).
    note: we modify the algorithm, which is defined by Angsheng Li and Yichen Pan in [1], definition 6, and implemented by Li et al in [2].
    [1] Angsheng Li, Yicheng Pan: Structural Information and Dynamical Complexity of Networks. IEEE Trans. Information Theory 62(6): 3290-3339 (2016)
    [2] Angsheng L , Xianchen Y , Bingxiang X , et al. Decoding topologically associating domains with ultra-low resolution Hi-C data by graph structural entropy. Nature Communications, 2018, 9(1):3265-.
    """
    codetree = init_codetree(G)
    while True:
        if codetree.depth() > nx.number_of_nodes(G):
            break
        fpointers_ids_of_root = codetree.get_node(codetree.root).fpointer
        merge_delta_entropy = 0
        for node_id_alpha in fpointers_ids_of_root:
            alpha_index = fpointers_ids_of_root.index(node_id_alpha)
            for node_id_beta in fpointers_ids_of_root[alpha_index + 1:]:
                if not codetree.get_node(node_id_alpha).is_leaf(
                ) and codetree.get_node(node_id_beta).is_leaf():
                    mg_T = merge(codetree, node_id_alpha, node_id_beta)
                    ent = merge_delta(G, codetree, mg_T, node_id_alpha,
                                      node_id_beta)
                    if ent > merge_delta_entropy:
                        merge_delta_entropy = ent
                        merged_tree = Tree(mg_T.subtree(mg_T.root), deep=True)
                elif codetree.get_node(node_id_alpha).is_leaf(
                ) and not codetree.get_node(node_id_beta).is_leaf():
                    mg_T = merge(codetree, node_id_beta, node_id_alpha)
                    ent = merge_delta(G, codetree, mg_T, node_id_beta,
                                      node_id_alpha)
                    if ent > merge_delta_entropy:
                        merge_delta_entropy = ent
                        merged_tree = Tree(mg_T.subtree(mg_T.root), deep=True)

        combine_delta_entropy = 0
        for node_id_alpha in fpointers_ids_of_root:
            alpha_index = fpointers_ids_of_root.index(node_id_alpha)
            for node_id_beta in fpointers_ids_of_root[alpha_index + 1:]:
                if codetree.get_node(node_id_alpha).is_leaf(
                ) and codetree.get_node(node_id_beta).is_leaf():
                    cm_T = combine(codetree, node_id_alpha, node_id_beta)
                    ent = combine_delta(G, codetree, cm_T, node_id_alpha,
                                        node_id_beta)
                    if ent > combine_delta_entropy:
                        combine_delta_entropy = ent
                        combined_tree = Tree(cm_T.subtree(cm_T.root),
                                             deep=True)

        if merge_delta_entropy > 0 and merge_delta_entropy > combine_delta_entropy:
            codetree = Tree(merged_tree.subtree(merged_tree.root), deep=True)
        elif combine_delta_entropy > 0 and combine_delta_entropy > merge_delta_entropy:
            codetree = Tree(combined_tree.subtree(combined_tree.root),
                            deep=True)
        else:
            break

    return codetree
def optimal_graph_coding_tree(G):
    codetree = init_codetree(G)
    while True:
        # print("round:")
        merge_delta_entropy = 0
        for alpha_nid in codetree.expand_tree(mode=2):
            for beta_node in codetree.siblings(alpha_nid):
                if not codetree.get_node(
                        alpha_nid).is_leaf() and beta_node.is_leaf():
                    mg_T = merge(codetree, alpha_nid, beta_node.identifier)
                    ent = merge_delta(G, codetree, mg_T, alpha_nid,
                                      beta_node.identifier)
                    if ent > merge_delta_entropy:
                        merge_delta_entropy = ent
                        merged_tree = Tree(mg_T.subtree(mg_T.root), deep=True)
                        # print("merge tree:")
                        # print("en:",merge_delta_entropy)
                        # merged_tree.show()
                elif codetree.get_node(
                        alpha_nid).is_leaf() and not beta_node.is_leaf():
                    mg_T = merge(codetree, beta_node.identifier, alpha_nid)
                    ent = merge_delta(G, codetree, mg_T, beta_node.identifier,
                                      alpha_nid)
                    if ent > merge_delta_entropy:
                        merge_delta_entropy = ent
                        merged_tree = Tree(mg_T.subtree(mg_T.root), deep=True)
                        # print("merge tree:")
                        # print("en:",merge_delta_entropy)
                        # merged_tree.show()

        combine_delta_entropy = 0
        for alpha_nid in codetree.expand_tree(nid=-1, mode=2):
            for beta_node in codetree.siblings(alpha_nid):
                if len(codetree.siblings(alpha_nid)) == 1:
                    break
                if codetree.get_node(
                        alpha_nid).is_leaf() and beta_node.is_leaf():
                    cm_T = combine(codetree, alpha_nid, beta_node.identifier)
                    ent = combine_delta(G, codetree, cm_T, alpha_nid,
                                        beta_node.identifier)
                    if ent > combine_delta_entropy:
                        combine_delta_entropy = ent
                        combined_tree = Tree(cm_T.subtree(cm_T.root),
                                             deep=True)
                        # print("combine tree:")
                        # print("en:",combine_delta_entropy)
                        # combined_tree.show()

        if merge_delta_entropy > 0 and merge_delta_entropy > combine_delta_entropy:
            codetree = Tree(merged_tree.subtree(merged_tree.root), deep=True)
        elif combine_delta_entropy > 0 and combine_delta_entropy > merge_delta_entropy:
            codetree = Tree(combined_tree.subtree(combined_tree.root),
                            deep=True)
        else:
            break
    return codetree
Example #3
0
 def traverseLogR(self, tree: Tree, action, builder=None):
     if builder is None:
         action(tree)
     else:
         action(tree, builder)
     for nid in tree._rootNode.fpointer:
         traverseLogR(tree.subtree(nid), action, builder)
Example #4
0
 def get_data(self, json_file_location, file_name, name):
     data = {}
     sub_t = Tree()
     directory = os.path.join(json_file_location)
     with open(directory + file_name, 'rb') as config_dictionary_file:
         sub_t = pickle.load(config_dictionary_file)
     return sub_t.subtree(name).to_json(with_data=True)
Example #5
0
def create_oa_tree():
    #global oa_tree #debug only
    #连接数据库
    db, cursor = connect_db('localhost', 'root', 'yoyoball', 'np020')

    oa_tree = Tree()

    sql = "SELECT `orgid`, `shortname`, `parentorgid` FROM groupinfo"
    cursor.execute(sql)
    dept_result = cursor.fetchall()
    #print dept_result debug only
    if dept_result != None and len(dept_result) > 0:
        oa_tree.create_node('##oa_root##', '0000')  #先创建虚拟根
        for i in range(len(dept_result)):  #向虚拟根填充所有组织
            #print dept_result[i][1].decode('utf-8'), dept_result[i][0], dept_result[i][2]
            oa_tree.create_node(dept_result[i][1], dept_result[i][0], '0000')
        for i in range(len(dept_result)):  #修改隶属关系
            if dept_result[i][
                    0] != '0000':  #只要不是实根,就要修改隶属关系【OA中'001000'等的组织上级为‘0000’即OA数据库中存在虚根,所以无需做此步骤】
                if oa_tree.contains(dept_result[i][2]):  #判断上级是否存在
                    oa_tree.move_node(dept_result[i][0], dept_result[i][2])
                else:  #没有上级的不修改
                    continue
    #断开数据库
    close_db(db)
    return oa_tree.subtree('006953')  #用于集成本部测试
Example #6
0
def create_ding_tree():
    #global ding_tree, dept_result #debug only
    #连接数据库
    db, cursor = connect_db('localhost', 'root', 'yoyoball', 'dingtalk')

    ding_tree = Tree()
    
    sql = "SELECT `id`, `name`, `parentid` FROM dingding_department_list"
    cursor.execute(sql)
    dept_result = cursor.fetchall()
    #print dept_result #debug only
    if dept_result != None and len(dept_result) > 0:
        ding_tree.create_node('##ding_root##', '0') #先创建虚拟根
        for i in range(len(dept_result)): #向虚拟根填充所有组织
            #print dept_result[i] #debug only
            #ding_tree.create_node(dept_result[i][1].decode('utf-8'), dept_result[i][0], '0000')
            ding_tree.create_node(dept_result[i][1], dept_result[i][0], '0')
        for i in range(len(dept_result)): #修改隶属关系
            if dept_result[i][0] != '1' : #只要不是实根,就要修改隶属关系【钉钉中实根id为'1'且无上级部门,数据表dingding_department_list中存储id为'1'的部门上级为'0'】
                if ding_tree.contains(dept_result[i][2]): #判断上级是否存在
                    ding_tree.move_node(dept_result[i][0], dept_result[i][2])
                else: #没有上级的不修改
                    #print type(dept_result[i][2]), dept_result[i][2] #debug only
                    continue
    #断开数据库
    close_db(db)
    #return ding_tree
    return ding_tree.subtree('1')
Example #7
0
    def configure_tree_topology(self, root, degree=2, remove=False):
        """Configures the cluster's network topology as a tree.

        The tree consists of the specified root node and the nodes,
        which build the subtrees. The childrens are incrementally chosen,
        in other words, sequentially as specified in the config file.

        Arguments:
            root {integer} -- The tree's root node.

        Keyword Arguments:
            degree {integer} -- The maximum number of children (default: {2})
            remove {boolean} -- Remove the configuration (default: {False})
        """

        self.logger.info("Configuring tree topology...")
        tree = Tree()
        root_node = self.topology.get_node(root)
        tree.create_node(root_node.name, root_node.node_id)
        parent_node = root
        for nodex in self.topology.nodes:
            if nodex.node_id == root_node.node_id:
                continue
            if len(tree.children(parent_node)) >= degree:
                if parent_node == root and root != 0:
                    parent_node = 0
                elif parent_node + 1 == root:
                    parent_node += 2
                else:
                    parent_node += 1
            tree.create_node(nodex.name, nodex.node_id, parent_node)

        self.logger.info("The following tree will be configured:")
        tree.show()

        for nodex in self.topology.nodes:
            self.logger.debug("%s:", nodex.name)
            subtree = tree.subtree(nodex.node_id)
            for nodey in self.topology.nodes:
                if nodex.node_id == nodey.node_id:
                    continue
                if subtree.contains(nodey.node_id):
                    children = tree.children(nodex.node_id)
                    for child in children:
                        if (child.identifier == nodey.node_id
                                or tree.is_ancestor(child.identifier,
                                                    nodey.node_id)):
                            nodex.add_forwarding(
                                nodey,
                                self.topology.get_node(child.identifier))
                            break
                elif tree.parent(nodex.node_id) != None:
                    nodex.add_forwarding(
                        nodey,
                        self.topology.get_node(
                            tree.parent(nodex.node_id).identifier))

        if not self.testing:
            self.topology.send_forwarding_tables(remove)
Example #8
0
def get_path_to_santa(orbital_tree: Tree) -> list:

	path_to_santa = []
	current_node = orbital_tree.parent('YOU')
	traversal_complete = False

	while not traversal_complete:
		if orbital_tree.subtree(current_node.identifier).contains('SAN'):
			for path in orbital_tree.subtree(current_node.identifier).paths_to_leaves():
				if 'SAN' in path:
					path_to_santa += path[:-1]
					traversal_complete = True
		else:
			path_to_santa.append(current_node.identifier)
			current_node = orbital_tree.parent(current_node.identifier)

	return path_to_santa
 def compare_actual_folder_with_tree(self, root: path, tree: Tree):
     root_name = tree.root
     root_path = root.joinpath(root_name)
     print(root_path)
     self.assertTrue(root_path.exists(), "The path {} should exist, but doesn't".format(root_path))
     children = tree.children(root_name)
     for children in children:
         subtree = tree.subtree(children.identifier)
         self.compare_actual_folder_with_tree(root_path, subtree)
Example #10
0
def trim_excess_root(tree: Tree) -> Tree:
    # Remove any nodes from the root that have only 1 child.
    # I.e, replace A → B → (C, D) with B → (C, D)
    root_id = tree.root
    branches = tree.children(root_id)
    if len(branches) == 1:
        tree.update_node(branches[0].identifier, parent=None, bpointer=None)
        new_tree = tree.subtree(branches[0].identifier)
        return trim_excess_root(new_tree)
    else:
        return tree
def our_cost(G: nx.Graph, T: tl.Tree) -> float:
    T_leaves = [n.tag for n in T.leaves()]
    cost = 0
    for edge in G.edges:
        # only look at edges in this tree.
        if edge[0] in T_leaves and edge[1] in T_leaves:
            lca = get_lca(T, edge[0], edge[1])
            subtree = T.subtree(lca)
            subtree_leaves = subtree.leaves()
            for leaf in subtree_leaves:
                cost += subtree.level(leaf.identifier)
    return cost
Example #12
0
def get_descendents(account_id: str, account_tree: Tree) -> list:
    """
    Return a list of tags of all descendent accounts of the input account.
    """

    try:
        subtree_nodes = account_tree.subtree(account_id).all_nodes()
        descendent_list = [x.tag for x in subtree_nodes if x.tag != account_id]
    except tlexceptions.NodeIDAbsentError:
        descendent_list = []

    return descendent_list
Example #13
0
def collapse(t1: tl.Tree, t2: tl.Tree) -> tl.Tree:
    # work with copies.
    t1 = tl.Tree(tree=t1, deep=True)
    t2 = tl.Tree(tree=t2, deep=True)

    # reset all the identifiers:
    t1 = reset_ids(t1)
    t2 = reset_ids(t2)

    # paste all the children of t2 into the root of t1
    for child in t2.children(t2.root):
        t1.paste(t1.root, t2.subtree(child.identifier))

    return t1
def create_dummy_download_folder(root: path, tree: Tree) -> path:
    root_name = tree.root
    root_path = root.joinpath(root_name)

    if not root_path.exists():
        print("Creating {}".format(root_path))
        if root_name.endswith(".mp3"):
            root_path.touch()
        else:
            root_path.mkdir()
        time.sleep(0.01)  # sleep to ensure that the created folders don't have the same ctime

    children = tree.children(root_name)
    for children in children:
        subtree = tree.subtree(children.identifier)
        create_dummy_download_folder(root_path, subtree)
    return root_path
Example #15
0
  def crossOver(individualA, individualB):
    tree = None

    while tree is None or tree.depth(tree.get_node(tree.root)) > TREE_MAX_DEPTH:
      treeA = Tree(tree = individualA.tree, deep=True)
      treeB = Tree(tree = individualB.tree, deep=True)
      regenerate_ids(treeA)
      regenerate_ids(treeB)
      removedNode = random.choice(treeA.all_nodes())
      addedNode = random.choice(treeB.all_nodes())

      addedSubtree = Tree(tree = treeB.subtree(addedNode.identifier), deep=True)

      if treeA.root == removedNode.identifier:
        tree = addedSubtree

      else:
        parent = treeA.parent(removedNode.identifier)
        treeA.remove_subtree(removedNode.identifier)
        treeA.paste(parent.identifier, addedSubtree)
        tree = treeA

    return Individual(tree)
Example #16
0
class Parser():

    """
    Parser class for parsing a NAL xml file
    """
    # All of the roots are coming from the TM queries
    # Other items in NAL may be considered phenotypes or chemicals
    # But are not being labelled for our purposes
    __root2label = {
        322:"Phenotype", 156:"Phenotype", 319:"Phenotype",
        7812:"Chemical", 8:"Chemical", 264:"Chemical", 
        858:"Plant"
    }

    def __init__(self, namespace="usda_nal_thesaurus"):
        self.namespace = namespace
        self.tree = Tree()
        self.tree.create_node("Root","root")
        self.name2id = {}

    def parse(self, xmlFile):
        """
        parsing function that parses an xml file
        """
        nodes = {}
        tree = ET.parse(xmlFile)
        root = tree.getroot() # THESAURUS node
        for concept in root: # Parse xml with nodes as concepts
            iden = "NAL:"+concept.find("TNR").text # node source ID
            nodes[iden], name = self.parseNode(concept) # Parse the node from xml file
            self.tree.create_node(tag=name, identifier=iden, parent='root') # create the node in the tree
            self.name2id[name] = iden # For mapping edges
        # Iterate through twice because parent/child relationships are connected via name not id
        # in xml file and file is sorted alphabetically
        for nodeID, props in nodes.items(): # Add edges to node as a list of tuples
            nodes[nodeID]["edges"] = self.parseEdges(nodeID,props)
        for node,label in self.__root2label.items(): # Add specific labels to nodes
            sub = self.tree.subtree("NAL:{}".format(node))
            for i in sub.all_nodes():
                iden = i.identifier.split(".")[0]
                nodes[iden]["labels"].add(label)
        return nodes
        
    def parseNode(self,node):
        """
        Parse each node from the xml file
        """
        labels = set()
        name = node.find("DESCRIPTOR").text
        synonyms = extractElem(node, "UF") # Used for
        parents = extractElem(node, "BT") # Broader term
        children = extractElem(node, "NT") # Narrow term
        # associated = extractElem(node,"RT") # Related term
        # categories = extractElem(node, "SC") # Subject category
        # for cat in categories:
        #     item = cat.split(" ")[0]
        #     if item in self.labels.keys():
        #         labels.add(self.labels[item])
        labels.add(self.namespace)
        return {"name": name, "synonyms": list(synonyms), "parents": parents,
                "children": children, "labels": labels}, name
    
    def parseEdges(self, nodeID, props):
        """
        Add edges to nodes.
        Some nodes have multiple parents
        """
        edges = []
        multiParent = 0
        for name in props["children"]:
            edges.append(("has_child",self.name2id[name]))
        for name in props["parents"]:
            parentID = self.name2id[name]
            if multiParent == 0:
                self.tree.move_node(nodeID,parentID)
            else:
                self.tree.create_node(tag=props["name"],identifier=nodeID+".{}".format(multiParent),parent=parentID)
            edges.append(("is_a",parentID))
            multiParent += 1
        return edges
Example #17
0
class CoreCommand:
    "Base command"

    _events = {}
    _entries = {}
    _native_pool = None
    _progress_counter = itertools.count(1)
    _progresses = LRUCache(1000 * 1000)

    def __new__(cls, *args, **kwargs):
        obj = super(CoreCommand, cls).__new__(cls)
        obj._get_commands()
        return obj

    def __init__(self, priority=constants.Priority.Normal):
        self._created_time = arrow.now()
        self.command_id = None
        self._started_time = None
        self._finished_time = None
        self._priority = priority
        self._futures = []
        self._progress_max = None
        self._progress_current = None
        self._progress_text = None
        self._progress_count = None
        self._progress_type = None
        self._progress_tree = None
        self._progress_time = None
        self._progress_timestamp = 0
        self._progress_title = self.__class__.__name__

    def _run(self, *args, **kwargs):
        """
        Run the command with *args and **kwargs.
        """
        log.d("Running command:", self.__class__.__name__)
        r = self.main(*args, **kwargs)
        log.d("Finished running command:", self.__class__.__name__)
        return r

    @classmethod
    def get_all_progress(cls):
        ps = []
        for c, t in cls._progresses.items():
            x = t.get_node(t.root).data()
            if x:
                ps.insert(0, x.get_progress())
        return ps

    def _add_progress(self, add=True):
        if not self._progress_count:
            self._progress_count = next(self._progress_counter)

        if self._progress_tree is None and self._progress_count not in self._progresses:
            self._progress_tree = Tree()
            if add:
                self._progresses[self._progress_count] = self._progress_tree
            self._progress_tree.create_node(self._progress_count,
                                            self._progress_count,
                                            data=weakref.ref(self))
            self._progress_timestamp = arrow.now().timestamp

        self._progress_time = arrow.now()

    def merge(self, cmd):
        """
        Merge this command into given command
        """
        assert cmd is None or isinstance(cmd, CoreCommand)
        if cmd:
            self.merge_progress_into(cmd)
        return self

    def merge_progress_into(self, cmd):
        assert isinstance(cmd, CoreCommand)
        cmd._add_progress()
        self._add_progress(False)
        cmd._progress_tree.paste(cmd._progress_count, self._progress_tree)

        self._progress_tree = cmd._progress_tree

        if self._progress_count in self._progresses:
            del self._progresses[self._progress_count]

    def _str_progress_tree(self):
        self._tree_reader = ""

        def w(l):
            self._tree_reader = l.decode('utf-8') + '\n'

        try:
            self._progress_tree._Tree__print_backend(func=w)
        except tree_exceptions.NodeIDAbsentError:
            self._tree_reader = "Tree is empty"
        return self._tree_reader

    def get_progress(self):

        if self._progress_tree:
            log.d("Command", self,
                  "progress tree:\n{}".format(self._str_progress_tree()))
            p = {
                'title': self._progress_title,
                'subtitle': '',
                'subtype': None,
                'text': '',
                'value': .0,
                'percent': .0,
                'max': .0,
                'type': self._progress_type,
                'state': self.state.value if hasattr(self, "state") else None,
                'timestamp': self._progress_timestamp
            }

            t = self._progress_tree.subtree(self._progress_count)
            prog_time = self._progress_time
            prog_text = self._progress_text if self._progress_text else ''
            prog_subtitle = ''
            prog_subtype = None
            for _, n in t.nodes.items():
                cmd = n.data()
                if cmd:
                    if cmd._progress_max:
                        p['max'] += cmd._progress_max
                    if cmd._progress_current:
                        p['value'] += cmd._progress_current

                    if not prog_time or (cmd._progress_time
                                         and cmd._progress_time > prog_time):
                        prog_text = cmd._progress_text
                        prog_subtitle = cmd._progress_title
                        prog_subtype = cmd._progress_type
            if p['max']:
                p['percent'] = (100 / p['max']) * p['value']
            else:
                p['percent'] = -1.0
            p['text'] = prog_text
            p['subtitle'] = prog_subtitle
            p['subtype'] = prog_subtype
            return p
        return None

    def set_progress(self, value=None, text=None, title=None, type_=None):
        assert value is None or isinstance(value, (int, float))
        assert text is None or isinstance(text, str)
        assert title is None or isinstance(text, str)
        self._add_progress()
        if title is not None:
            self._progress_title = title
        if value is not None:
            self._progress_current = value
        if text is not None:
            self._progress_text = text
        if type_ is not None:
            self._progress_type = type_

    def set_max_progress(self, value, add=False):
        assert isinstance(value, (int, float))
        self._add_progress()
        if add:
            if self._progress_max is None:
                self._progress_max = 0
            self._progress_max += value
        else:
            self._progress_max = value

    def next_progress(self, add=1, text=None, _from=0):
        assert isinstance(add, (int, float))
        if self._progress_current is None:
            self._progress_current = _from
        if text is not None:
            self._progress_text = text
        self._progress_current += add
        utils.switch(self._priority)

    @contextmanager
    def progress(self, max_progress=None, text=None):
        if max_progress is not None:
            self.set_max_progress(max_progress)
        yield
        if max_progress is not None:
            self.set_progress(max_progress, text)

    def run_native(self, f, *args, **kwargs):
        f = async_utils.AsyncFuture(
            self, self._native_pool.apply_async(_native_runner(f), args,
                                                kwargs))
        self._futures.append(f)
        return f

    def push(self, msg, scope=None):
        if constants.notification:
            return constants.notification.push(msg, scope=scope)
        # TODO: raise error perhaps?

    def kill(self):
        [f.kill() for f in self._futures]

    def _log_stats(self, d=None):
        create_delta = self._finished_time - self._created_time
        run_delta = self._finished_time - self._started_time
        log_delta = (d - self._finished_time) if d else None
        log.i(
            "Command - '{}' -".format(self.__class__.__name__),
            "ID({})".format(self.command_id) if self.command_id else '',
            "running time:\n",
            "\t\tCreation delta: {} (time between creation and finish)\n".
            format(create_delta),
            "\t\tRunning delta: {} (time between start and finish)\n".format(
                run_delta),
            "\t\tLog delta: {} (time between finish and this log)\n".format(
                log_delta),
        )

    def __del__(self):
        if hasattr(self, '_progress_count') and hasattr(self, '_progresses'):
            if self._progress_count and self._progress_count in self._progresses:
                del self._progresses[self._progress_count]

    @classmethod
    def _get_commands(cls, self=None):
        ""
        if self is not None:
            cls = self
        events = {}
        entries = {}
        for a in cls.__dict__.values():
            if isinstance(a, CommandEvent):
                a.command_cls = cls
                events[a.name] = a
                a._init()

            if isinstance(a, CommandEntry):
                a.command_cls = cls
                entries[a.name] = a
                a._init()
        cls._entries = entries
        cls._events = events
        return entries, events
Example #18
0
    parentId = container["parentId"]
    if containername != "Tenant":
        if tree.contains(parentName):
            if tree.contains(containername) is False:
                tree.create_node(containername,
                                 containername,
                                 parent=parentName)
        else:
            getcontainerbyid = clntapi.get_container_by_id(parentId)
            parent_parentname = getcontainerbyid["parentName"]
            tree.create_node(parentName, parentName, parent=parent_parentname)
            tree.create_node(containername, containername, parent=parentName)
    if containername == targetcontainer:
        targetcontainerkey = container["key"]

sub_t = tree.subtree(containertobemoved)
sub_t.show()
paths_to_leaves = sub_t.paths_to_leaves()

movedcontainers = []

for paths in paths_to_leaves:
    if suffixremove == "n":
        parentname = paths[0] + "_temp"
        for container in paths:
            if container == containertobemoved:
                if container not in movedcontainers:
                    addContainer = clntapi.add_container(
                        container + "_temp", targetcontainer,
                        targetcontainerkey)
                    getcontainerbyname = clntapi.get_container_by_name(
Example #19
0
    def __init__(self):
        self.mfest = load_manifest("../chapters.yaml")
        self.books = {}
        self.chaps = {}

        for adef in self.mfest:
            for defheader, defs in adef.items():
                if not defheader.startswith("BOOK_"):
                    self.chaps[defheader] = defs
                else:
                    self.books[defheader] = defs
        #
        # nested dict approach, not working very well
        '''
        for title, bookchaps in self.books.items():
            print ("BOOK: {title}".format(title=title))
            print ("+" * 80)
            book = {title: bookchaps}
            pprint (book)
            print ("-" * 80)
            pprint (expand_def(book, self.chaps))
            print ("*" * 80)
        #pprint(books)
        '''

        #
        # tree approach, better
        self.treechap = {}
        for title, chap in self.chaps.items():
            self.treechap[title] = create_tree({title: chap})

        self.treebook = {}
        for title, book in self.books.items():
            self.treebook[title] = create_tree({title: book})

        for title, tree in self.treebook.items():
            # tree.show()
            for node in tree.expand_tree(mode=Tree.DEPTH):
                # print ("+", node)
                realtag = node
                if type(realtag) is Node:
                    realtag = node.tag
                if "|" in realtag:
                    realtag = realtag.split("|")[1]
                if realtag.startswith("$ref:"):
                    chapkey = realtag.split("$ref:")[1]
                    newtree = Tree(tree=self.treechap[chapkey], deep=True)
                    # move up its children to replace totally the root
                    subtree = newtree.subtree(
                        newtree.children(newtree.root)[0].tag)
                    newtree = subtree
                    for anode in tree.children(node):
                        origtag = anode.tag
                        if "|" in origtag:
                            origtag = anode.tag.split("|")[1]
                        # print (origtag)
                        newtree.create_node(timestamp_node(origtag),
                                            origtag,
                                            parent=newtree.root,
                                            data=time())
                    # find parent node of the node to be replaced
                    parent = tree.parent(node)
                    # use the old timestamp data to preserve insertion order
                    newtree.get_node(
                        newtree.root).data = tree.get_node(node).data
                    # remove old node
                    tree.remove_subtree(node)
                    # replace with new expanded node
                    tree.paste(parent.identifier, newtree)
with urllib.request.urlopen('http://www.image-net.org/api/xml/structure_released.xml') as response:
    html = response.read()
tree = ElementTree(fromstring(html))
root = tree.getroot()

synsetTree = Tree()

synsetTree.create_node('Entity', 'fall11', data = Confidence(0, 0))
for synset in root.iter('synset'):
  for child in synset:
    if child.get('wnid') in synsetTree._nodes:
      continue
    synsetTree.create_node(child.get('words'), child.get('wnid'), parent = synset.get('wnid'), data = Confidence(0, 0))

# synsetTree.show()
treeDog = synsetTree.subtree('n02087122')

model_file = "tf_files/retrained_graph.pb"
graph = load_graph(model_file)


def image_label(file_ID, file_suffix, graph):
  if __name__ == "__main__":
    start = time.clock()
    file_name = "tf_files/ImageNet_test" + '/' + file_ID + '/' + file_suffix
    model_file = "tf_files/retrained_graph.pb"
    label_file = "tf_files/retrained_labels.txt"
    input_height = 224
    input_width = 224
    input_mean = 128
    input_std = 128
Example #21
0
print("#"*4 + "All family members in DEPTH mode")
for node in tree.expand_tree(mode=Tree.DEPTH):
    print tree[node].tag
print('\n') 


print("#"*4 + "All family members without Diane sub-family")
tree.show(idhidden=False, filter=lambda x: x.identifier != 'diane')
# for node in tree.expand_tree(filter=lambda x: x.identifier != 'diane', mode=Tree.DEPTH):
#     print tree[node].tag
print('\n') 


print("#"*4 + "Let me introduce Diane family only")
sub_t = tree.subtree('diane')
sub_t.show()
print('\n') 


print("#"*4 + "Children of Diane")
print tree.is_branch('diane')
print('\n')


print("#"*4 + "OOhh~ new members enter Jill's family")
new_tree = Tree()
new_tree.create_node("n1", 1)  # root node
new_tree.create_node("n2", 2, parent=1)
new_tree.create_node("n3", 3, parent=1)
tree.paste('jill', new_tree)
Example #22
0
#     for key, value in dictionary.items():
#         if type(value) is dict:
#             if root:
#                 t = 5
#                 root_time = t
#             else:
#                 print('t', root_time)
#             print(key, value)
#             recursive_items(value, False, root_time)
#         else:
#             print(key, value)
#
# a = {'a': {1: {1: 2, 3: 4}, 2: {5: 6}}}
#
# recursive_items(a, True, 0)

from treelib import Node, Tree

tree = Tree()
tree.create_node("Harry", "harry")  # root node
tree.create_node("Jane", "jane", parent="harry")
tree.create_node("Bill", "bill", parent="harry")
tree.create_node("Diane", "diane", parent="jane")
tree.create_node("Mary", "mary", parent="diane")
tree.create_node("Mark", "mark", parent="jane")
tree.show()

sub_tree = tree.subtree("mark")
sub_tree.show()
print(len(tree.children(tree.root)))
Example #23
0
print("#"*4 + "Breakdown of out family")
tree.show()
print('\n') 

print("#"*4 + "All family members in DEPTH mode")
for node in tree.expand_tree(mode=Tree.DEPTH):
    print tree[node].tag
print('\n') 

print("#"*4 + "All family members without Diane sub-family")
for node in tree.expand_tree(filter=lambda x: x != 'diane', mode=Tree.DEPTH):
    print tree[node].tag
print('\n') 

print("#"*4 + "Let me introduce Diane family only")
sub_t = tree.subtree('diane')
sub_t.show()
print('\n') 

print("#"*4 + "Children of Diane")
print tree.is_branch('diane')
print('\n')

print("#"*4 + "OOhh~ new members enter Jill's family")
new_tree = Tree()
new_tree.create_node("n1", 1)  # root node
new_tree.create_node("n2", 2, parent=1)
new_tree.create_node("n3", 3, parent=1)
tree.paste('jill', new_tree)
tree.show()
print('\n')
Example #24
0
class Group(ElementWithAttributes):
    def __init__(self):
        super(Group, self).__init__()
        self.type = DATA_DIR_TYPES.GROUP

        self.path = None
        self.tree = Tree()

    def __getitem__(self, item):

        if item not in self.tree:
            rsplit = item.rsplit("/", maxsplit=1)
            if len(rsplit) == 1:
                item_0 = self.tree.root
                key = rsplit[0]
            else:
                item_0, key = rsplit
            if item_0 in self.tree:
                node = self.tree[item_0]
                if (isinstance(node.data, ElementWithAttributes)
                        and key in node.data.attrs):
                    return node.data.attrs[
                        key]  # ### RETURN attribute value ###

            raise KeyError(f"{item} is not a valid key")

        node = self.tree[item]

        if isinstance(node.data, Group):
            # rebuild tree with reduced identifiers
            stree = self.tree.subtree(item)
            for n in stree.all_nodes_itr():
                if n.predecessor(stree.identifier) is None:
                    parent = None
                else:
                    parent = n.predecessor(stree.identifier).split(
                        item, maxsplit=1)[1]
                node.data.tree.create_node(n.tag,
                                           n.identifier.split(item,
                                                              maxsplit=1)[1],
                                           parent,
                                           data=n.data)

        elif isinstance(node.data, DataSet):
            if node.data.df.empty:
                if self.path is None:
                    raise GroupError(
                        f"{item} is not loaded yet and this element is not linked to a File or Group"
                    )
                node.data.df = pd.read_parquet(self.path / item / DATA_FILE)

        return node.data

    def __setitem__(self, key, value):

        if key in self.tree:
            raise KeyError(f"{key} already exists")

        rsplit = key.rsplit("/", maxsplit=1)
        if len(rsplit) == 1:
            item_0 = self.tree.root
            key_1 = rsplit[0]
        else:
            item_0, key_1 = rsplit

        if item_0 is not None and item_0 not in self.tree:
            raise KeyError(f"Parent key {item_0} does not exist")

        dd_type = None
        if isinstance(value, Group):
            dd_type = value.type
            new_tree = Tree()
            for node in value.tree.all_nodes_itr():
                if node.parent is None:
                    parent = None
                else:
                    parent = key + "/" + node.parent
                new_tree.create_node(node.tag,
                                     key + "/" + node.identifier,
                                     parent=parent,
                                     data=node.data)
                value.tree = new_tree
            self.tree.create_node(tag=key_1,
                                  identifier=key,
                                  parent=item_0,
                                  data=value)
            self.tree.paste(key, new_tree)

        elif isinstance(value, DataSet):
            dd_type = DATA_DIR_TYPES.DATASET
            self.tree.create_node(tag=key_1,
                                  identifier=key,
                                  parent=item_0,
                                  data=value)
            if self.path is not None:
                value.df.to_parquet(self.path / key / DATA_FILE)

        elif isinstance(value, Raw):
            pass
        elif isinstance(value, Attribute):
            pass
        else:
            raise ValueError(f"{value} is not a valid type for DataDir")

        # write ddir and attributes file if self is linked
        if isinstance(value, ElementWithAttributes) and self.path is not None:
            (self.path / key).mkdir()
            _write_ddir_json(self.path / key, dd_type=dd_type)
            json.dump(value.attrs,
                      (self.path / key / ATTRIBUTES_FILE).open("w"),
                      indent=4)

    def link(self, path):
        self.path = path
Example #25
0
class TreeT(object):
    def __init__(self, max_id=0):
        self.tree = Tree()

    def from_ptb_to_tree(self, line, max_id=0, leaf_id=1, parent_id=None):
        # starts by ['(', 'pos']
        pos_tag = line[1]
        if parent_id is None:
            pos_id = 0
        else:
            pos_id = max_id
            max_id += 1

        self.tree.create_node(pos_tag, pos_id, parent_id, TreeData())

        parent_id = pos_id
        total_offset = 2

        if line[2] != '(':
            # sub-tree is leaf
            # line[0:3] = ['(', 'pos', 'word', ')']
            word_tag = line[2]
            self.tree.create_node(word_tag, leaf_id, parent_id, TreeData())
            return 4, max_id, leaf_id + 1

        line = line[2:]

        while line[0] != ')':
            offset, max_id, leaf_id = self.from_ptb_to_tree(
                line, max_id, leaf_id, parent_id)
            total_offset += offset
            line = line[offset:]

        return total_offset + 1, max_id, leaf_id

    def add_height(self, tree_dep):

        for n in self.tree.all_nodes():
            n.data.leaves = []

        for leaf in self.tree.leaves():
            lid = leaf.identifier
            hid = tree_dep[lid]
            if hid == self.tree.root:
                self.tree[lid].data.height = self.tree.depth(self.tree[lid])
                for cid in [
                        p for p in self.tree.paths_to_leaves() if lid in p
                ][0]:
                    self.tree[cid].data.leaves += [lid]
            else:
                height = -1
                cid = lid
                cond = True
                while cond:
                    self.tree[cid].data.leaves += [lid]
                    height += 1
                    cid = self.tree.parent(cid).identifier
                    cid_leaves = [l.identifier for l in self.tree.leaves(cid)]
                    cid_l_dep = [tree_dep[l] for l in cid_leaves if l != lid]
                    cond = set(cid_l_dep).issubset(set(cid_leaves))
                self.tree[lid].data.height = height

        x_nodes = [
            n.identifier for n in self.tree.all_nodes() if n.data.leaves == []
        ]
        for x_node in x_nodes[::-1]:
            min_id = min(self.tree.children(x_node),
                         key=lambda c: c.data.height)
            _lid = min_id.data.leaves[0]
            self.tree[_lid].data.height += 1
            self.tree[x_node].data.leaves += [_lid]

        return True

    def _from_tree_to_ptb(self, nid):
        nid = self.tree.subtree(nid).root
        if self.tree[nid].is_leaf():
            return ' (' + self.tree[nid].tag + ' ' + self.tree[
                nid].data.word + ')'

        res = ' (' + self.tree[nid].tag

        for c_nid in sorted(self.tree.children(nid),
                            key=lambda x: x.identifier):
            res += self._from_tree_to_ptb(c_nid.identifier)

        return res + ')'

    def from_tree_to_ptb(self):
        return self._from_tree_to_ptb(self.tree.root)

    def from_tag_to_tree(self, tag, word, pos_id=0):
        parent_id = None
        for tag_nodes in tag:
            if tag_nodes[0] in [CL, CR]:
                c_side = tag_nodes[0]
                _tag_nodes = tag_nodes[1:] if len(tag_nodes) > 1 else ['']
            else:
                c_side = ''
                _tag_nodes = tag_nodes
            self.tree.create_node(_tag_nodes[0],
                                  pos_id,
                                  parent=parent_id,
                                  data=TreeData(comb_side=c_side))

            parent_id = pos_id
            pos_id += 1
            for tag_node in _tag_nodes[1:]:
                self.tree.create_node(tag_node[1:],
                                      pos_id,
                                      parent=parent_id,
                                      data=TreeData(miss_side=tag_node[0]))
                pos_id += 1
        for l in self.tree.leaves():
            if l.data.miss_side == '':
                l.data.word = word
                break
        return pos_id

    @memoize
    def is_combine_to(self, side):
        return self.tree[self.tree.root].data.comb_side == side

    @memoize
    def is_combine_right(self):
        return self.is_combine_to(CR)

    @memoize
    def is_combine_left(self):
        return self.is_combine_to(CL)

    @memoize
    def is_complete_tree(self):
        return all([n.data.miss_side == '' for n in self.tree.all_nodes()])

    @memoize
    def get_missing_leaves_to(self, miss_val, side):
        return [
            l.identifier for l in self.tree.leaves(self.tree.root)
            if l.data.miss_side == side and l.tag == miss_val
        ]

    @memoize
    def get_missing_leaves_left(self, miss_val):
        return self.get_missing_leaves_to(miss_val, L)

    @memoize
    def get_missing_leaves_right(self, miss_val):
        return self.get_missing_leaves_to(miss_val, R)

    @memoize
    def root_tag(self):
        return self.tree[self.tree.root].tag

    @memoize
    def is_no_missing_leaves(self):
        return all(
            [l.data.miss_side == '' for l in self.tree.leaves(self.tree.root)])

    @memoize
    def combine_tree(self, _tree, comb_leaf):
        self.tree.paste(comb_leaf, _tree.tree)
        self.tree.link_past_node(comb_leaf)
        return self

    def tree_to_path(self, nid, path):

        # Stop condition
        if self.tree[nid].is_leaf():
            path[nid] = []
            return nid, self.tree[nid].data.height

        # Recursion
        flag = CR
        for child in self.tree.children(nid):
            cid = child.identifier
            leaf_id, height = self.tree_to_path(cid, path)

            if (height == 0):
                # Reached end of path can add flag
                path[leaf_id].insert(0, flag)
                # path[leaf_id].append(flag)

            if height > 0:
                path[leaf_id].insert(0, nid)
                # only single child will have height>0
                # and its value will be the one that is returned
                # to the parent
                ret_leaf_id, ret_height = leaf_id, height - 1

                # once we reached a height>0, it means that
                # this path includes the parent, and thus flag
                # direction should flip
                flag = CL

        return ret_leaf_id, ret_height

    def path_to_tags(self, path):
        tags = []
        for p in path:
            _res = []
            _p = copy.copy(p)
            if _p[0] in [CL, CR]:
                _res.append(_p[0])
                _p = _p[1:]
            while _p[:-1]:
                el_p = _p.pop(0)
                _res.append(self.tree[el_p].tag)
                for c in self.tree.children(el_p):
                    if c.identifier != _p[0]:
                        _res.append(R + c.tag if c.identifier > _p[0] else L +
                                    c.tag)
            _res.append(self.tree[_p[0]].tag)
            tags.append(_res)
        return tags

    def path_to_words(self, path):
        return [self.tree[k].tag for k in path]

    def from_tree_to_tag(self):
        path = {}
        self.tree_to_path(self.tree.root, path)
        return {
            'tags': self.path_to_tags(path.values()),
            'words': self.path_to_words(path.keys())
        }

    def from_ptb_to_tag(self, line, max_id, depend):
        self.from_ptb_to_tree(line, max_id)
        self.add_height(depend)
        path = {}
        self.tree_to_path(self.tree.root, path)
        return self.path_to_tags(path.values())
Example #26
0
class model:
    #future
    #3+3, arms, seasonality
    #3+3
    #array of [#patients, stop period]
    #when reach #patient apply stop
    #trigger stop timer in congfig/iteration?
    #config groups
    #could consider config group, to combine configs to give max_patients, i.e 1 group per arm
    #config
    # consider adding treatment period, screening period to config for cohorts
    #country
    #max_patients per country
    #patients
    #actual patients are projected to enrol and complete?
    #PFS,OS
    #output
    #table with interation, config, country, site, patient, screned, enrolled, complete
    #option to use beta-pert?
    #multithreading for interations
    # add option to __add__ models to combine trees?
    def __init__(self,
                 config_objs,
                 num_iterations=1,
                 screening_period=0,
                 treatment_period=0):
        self.config_objs = config_objs
        self.num_iterations = num_iterations
        self.screening_period = screening_period
        self.treatment_period = treatment_period
        self.tree = Tree()

    def generate_model(self):
        #root node
        for a in ['model']:
            id_0 = a
            self.tree.create_node(a, id_0)
            #iterations
            for n in [str(i) for i in range(self.num_iterations)]:
                id_1 = n
                self.tree.create_node(n, id_1, id_0, data=None)

                #configs
                for config_obj in config_objs:
                    for config_dict, config_key in [[
                            config_obj.setup_dict[config_key], config_key
                    ] for config_key in config_obj.setup_dict]:
                        id_2 = '/'.join([id_1, config_key])
                        self.tree.create_node(config_key,
                                              id_2,
                                              parent=id_1,
                                              data=config(
                                                  config_obj.setup_dict,
                                                  config_obj.max_patients,
                                                  config_obj.current_timestep))

                        #countries
                        for country_info, country_dict, country_key in [[
                                config_dict[country_key][0],
                                config_dict[country_key][1], country_key
                        ] for country_key in config_dict]:
                            id_3 = '/'.join([id_2, country_key])
                            num_sites, screen_rate_low, screen_rate_med, screen_rate_high, setup_time_low, setup_time_med, setup_time_high, screen_fail_rate, drop_out_rate = country_info
                            self.tree.create_node(
                                country_key,
                                id_3,
                                parent=id_2,
                                data=country(num_sites, screen_rate_low,
                                             screen_rate_med, screen_rate_high,
                                             setup_time_low, setup_time_med,
                                             setup_time_high, screen_fail_rate,
                                             drop_out_rate))
                            generated_sites = 0

                            #sites
                            for site_info, site_dict, site_key in [[
                                    country_dict[site_key][0],
                                    country_dict[site_key][1], site_key
                            ] for site_key in country_dict]:
                                id_4 = '/'.join([id_3, site_key])
                                screen_rate, setup_time = site_info
                                if site_key.find('__') != 0:
                                    self.tree.create_node(site_key,
                                                          id_4,
                                                          parent=id_3,
                                                          data=site(
                                                              setup_time,
                                                              screen_rate))
                                    generated_sites += 1

                                #patients       #screening_period, treatment_period, screen_fail_rate, drop_out_rate, screen_dt, enrol_dt, complete_dt
                                for patient_dict, patient_key in [[
                                        site_dict[patient_key], patient_key
                                ] for patient_key in site_dict]:
                                    id_5 = '/'.join([id_4, patient_key])
                                    if patient_key.find('__') != 0:
                                        screen_dt, enrol_dt, complete_dt = patient_dict
                                        self.tree.create_node(
                                            patient_key,
                                            id_5,
                                            parent=id_4,
                                            data=patient(
                                                self.screening_period,
                                                self.treatment_period,
                                                screen_fail_rate,
                                                drop_out_rate, screen_dt,
                                                enrol_dt, complete_dt))
                                        if enrol_dt != None:
                                            config_obj.patients_enrolled += 1

                            #other sites
                            for s in range(num_sites - generated_sites):
                                id_4 = '/'.join([id_3, str(s)])
                                country_node = self.tree.get_node(id_3)
                                sr = country_node.data.triangular_screen_rate()
                                st = country_node.data.triangular_setup_time()
                                st = st if st > config_obj.current_timestep else config_obj.current_timestep
                                self.tree.create_node(str(s),
                                                      id_4,
                                                      parent=id_3,
                                                      data=site(st, sr))

    def show_model(self, iteration=0):
        if iteration == -1:
            self.tree.show()
        else:
            self.sub_tree = self.tree.subtree(
                str(iteration
                    ))  #need exception for if seletion if > num interations
            self.sub_tree.show()

    @staticmethod
    def simulate(model_obj, start_dt, max_timestep=1000):
        start_dt = datetime.strptime(start_dt, '%d-%m-%Y')
        for timestep in range(max_timestep):
            for iteration_node in model_obj.tree.children('model'):
                for config_node in model_obj.tree.children(
                        iteration_node.identifier):
                    if config_node.data.current_timestep > timestep: continue
                    if config_node.data.enrolment_complete:
                        continue  # if reached max patient
                    for country_node in model_obj.tree.children(
                            config_node.identifier):
                        for site_node in model_obj.tree.children(
                                country_node.identifier):
                            if site_node.data.setup_time > timestep: continue
                            site_node.data.screen_patient_buffer += site_node.data.screen_rate
                            if site_node.data.screen_patient_buffer >= 1:
                                for i in range(
                                        int(site_node.data.
                                            screen_patient_buffer)):
                                    patient_id = '/'.join([
                                        site_node.identifier,
                                        str(config_node.data.patients_enrolled)
                                    ])
                                    model_obj.tree.create_node(
                                        str(config_node.data.patients_enrolled
                                            ),
                                        patient_id,
                                        parent=site_node.identifier,
                                        data=patient.from_timestep(
                                            timestep, start_dt,
                                            model_obj.screening_period,
                                            model_obj.treatment_period,
                                            country_node.data.screen_fail_rate,
                                            country_node.data.drop_out_rate))
                                    patient_node = model_obj.tree.get_node(
                                        patient_id)
                                    if patient_node.data.enrolled_dt != None:
                                        config_node.data.patients_enrolled += 1
                                    if config_node.data.patients_enrolled >= config_node.data.max_patients:
                                        config_node.data.enrolment_complete = True
                                        config_node.data.max_patient_dt = date.strftime(
                                            start_dt +
                                            timedelta(days=timestep),
                                            '%d-%m-%Y')
                                        break
                            site_node.data.screen_patient_buffer = site_node.data.screen_patient_buffer % 1
Example #27
0
def use_hyp(word2syn, output, data):
    un_change = []
    dic = Tree()
    dic.create_node("100001740", "100001740")
    add = -1
    while add != 0:
        add = 0
        f = open(datapath + "wn_hyp.pl", "r")
        while True:
            line = f.readline()
            if not line:
                break
            else:
                l, r = re.findall('\d+', line)
                try:
                    dic.create_node(l, l, parent=r)
                    add += 1
                except:
                    pass
        print(dic.size())
    entail = defaultdict(list)
    for n in dic.all_nodes():
        for m in dic.subtree(n.tag).all_nodes():
            if m.tag != n.tag:
                entail[n.tag].append(m.tag)
    label = set()
    for d in data:
        d0 = d[0]
        d1 = d[1]
        if p.singular_noun(d[0]) != False:
            d0 = p.singular_noun(d[0])
        if p.singular_noun(d[1]) != False:
            d1 = p.singular_noun(d[1])
        for i in word2syn[d0]:
            for j in word2syn[d1]:
                if j in entail[i]:
                    if d[0] + "\t" + ">" + "\t" + d[1] not in output:
                        output += [d[0] + "\t" + ">" + "\t" + d[1]]
                        label.add(d)
                elif i in entail[j]:
                    if d[0] + "\t" + "<" + "\t" + d[1] not in output:
                        output += [d[0] + "\t" + "<" + "\t" + d[1]]
                        label.add(d)
        if d not in un_change and d not in label:
            un_change += [d]
    print("before single: " + str(len(data)) + " after: " +
          str(len(un_change)))
    output += ["\n"]
    del entail
    data = un_change
    del un_change
    un_change = []
    alter = defaultdict(list)
    for n in dic.all_nodes():
        for m in dic.siblings(n.tag):
            if m.tag != n.tag and n.bpointer != m.tag:
                alter[n.tag].append(m.tag)
    label = set()
    for d in data:
        d0 = d[0]
        d1 = d[1]
        if p.singular_noun(d[0]) != False:
            d0 = p.singular_noun(d[0])
        if p.singular_noun(d[1]) != False:
            d1 = p.singular_noun(d[1])
        for i in word2syn[d0]:
            for j in word2syn[d1]:
                if j in alter[i]:
                    if d[0] + "\t" + "|" + "\t" + d[1] not in output:
                        output += [d[0] + "\t" + "|" + "\t" + d[1]]
                        label.add(d)
                elif i in alter[j]:
                    if d[0] + "\t" + "|" + "\t" + d[1] not in output:
                        output += [d[0] + "\t" + "|" + "\t" + d[1]]
                        label.add(d)
        if d not in un_change and d not in label:
            un_change += [d]
    del alter
    print("before single: " + str(len(data)) + " after: " +
          str(len(un_change)))
    output += ["\n"]
    return output, un_change
Example #28
0
class FTPClient(QtWidgets.QLabel):
    """
    FTP 连接类
    """
    _signal = pyqtSignal(str)

    def __del__(self):
        """
        退出时执行ftp断开
        :return: 
        """
        print("connect close")
        self.ftp.close()
        #self._signal.emit('Del')

    def __init__(self, host: str, username: str, password: str, port='21'):
        """
        初始化 FTP 输入主机 端口用户名密码 之后连接FTP服务器
        :param host: 主机
        :param username: 用户名
        :param password: 密码
        :param port: 端口
        """
        print("init")
        super(FTPClient, self).__init__()
        self.host = host
        self.port = int(port)
        self.username = username
        self.password = password

    def startConnect(self):
        """
        建立FTP连接
        :return: 
        """
        self.nowDirName = 'root'
        #建立文件树 和根节点
        self.tree = Tree()
        itemProject = QStandardItem('root')
        itemProject.setIcon(self.getIcon())
        self.tree.create_node('root', 'root', parent=None, data=itemProject)
        # 连接FTP 连接成功之后 创建root的子目录
        self.ftp_connect()
        self.createTree(self.ftp.nlst(), 'root')
        #print('pwd',self.ftp.pwd())
        # 以下注释部分完成了 在ftp上文件系统内部的跳转 和列出文件系统
        #print('cwd0428',self.ftp.cwd('0428'))
        #print('nlst',self.ftp.nlst())
        #print('pwd',self.ftp.pwd())
        #print('cwd0428', self.ftp.cwd('Laser'))
        #print('nlst', self.ftp.nlst())
        #print('pwd', self.ftp.pwd())
        #self.createTree(self.ftp.nlst(), 'root/0428')
        self.tree.show()
        self._signal.emit("OK")
        # 信号发送
        #print("EMIT OK")
        #print(self.tree.children('root'))
        #self.download_file('/readme.txt','G:/data_sun/readme.txt')
    def restartTree(self):
        print("刷新树")
        self.tree.remove_subtree('root')
        itemProject = QStandardItem('root')
        itemProject.setIcon(self.getIcon())
        self.tree.create_node('root', 'root', parent=None, data=itemProject)
        self.ftp.cwd('/')
        self.createTree(self.ftp.nlst(), 'root')

    def createTree(self, chiledList: list, parent: str) -> bool:
        """
        通过输入的 子目录列表 和父目录的名称 进行建立文件树
        :param chiledList: 下一层目录所有的文件列表
        :param parent: 父路径名字
        :return: 是否创建了子树 0创建失败 1创建成功
        """
        if self.tree.subtree(parent).depth() == 0:  #当前子树深度 为0  那么说明还没有刷新该节点
            print("叶节点,开始创建文件子树")
        else:
            print("不是叶节点")
            return 0
        #按照列表内部的数据 依此建树  树的名称均为 父路径 + / + 当前文件名称(主要为了实现唯一标识 不然不同文件夹下相同的文件名 就会出错)
        for i in chiledList:
            itemProject = QStandardItem((parent + '/' + i))
            #print((parent+'/'+i),(parent+'/'+i).split('.'))
            if len((parent + '/' + i).split('.')) == 1:  #如果是文件夹 那么获取系统的文件夹的图标
                itemProject.setIcon(self.getIcon())
            else:
                itemProject.setIcon(
                    self.getIcon('.' + (parent + '/' + i).split('.')[-1]))
            self.tree.create_node(
                parent + '/' + i.encode('utf-8').decode('utf-8'),
                parent + '/' + i.encode('utf-8').decode('utf-8'),
                parent=parent,
                data=itemProject)  # 根节点
        return 1

    def ftp_connect(self):
        """
        FTP的具体连接类
        :return: None
        """
        self.ftp = FTP()
        # ftp.set_debuglevel(2)
        #连接主机
        self.ftp.connect(self.host, self.port)
        #实现登录
        self.ftp.login(self.username, self.password)
        self.ftp.encoding = 'utf-8'
        print("log in success")

    def getIcon(self, extension='file'):
        """
        获取扩展名在操作系统下的默认图标
        :param extension: 文件扩展名 如果不写默认为是文件
        :return: 对应的图标
        """
        provider = QFileIconProvider()
        tmpFile = QTemporaryFile('./_aa' + extension)
        tmpFile.setAutoRemove(False)
        icon = provider.icon(QFileInfo('./_aa' + extension))
        if extension == 'file':
            # 首先生成一个临时文件 之后获取临时文件的图标返回
            fileInfo = QFileInfo("C:\\Users")
            fileIcon = QFileIconProvider()
            #print(fileInfo, fileIcon)
            icon = QIcon(fileIcon.icon(fileInfo))
            return icon
        return icon

    def download_file(self, remotepath: str, localpath: str):
        """
        从远程FTP服务器下载文件 到本地路径
        :param remotepath: 远端路径
        :param localpath: 本地路径
        :return: None
        """
        remotepath = remotepath.replace('//', '/')
        localpath = localpath.replace('//', '/')
        if os.path.isdir(remotepath) or len(remotepath.split('.')) == 1:  #是文件夹
            self.download_dir(remotepath, localpath)
            return
        print("是文件")
        bufsize = 1024
        fp = open(localpath, 'wb')
        self.ftp.retrbinary('RETR ' + remotepath, fp.write, bufsize)
        self.ftp.set_debuglevel(0)
        fp.close()
        print("下载远程文件:", remotepath, "\t到本地路径:", localpath, "成功")

    def download_dir(self, remotedir: str, localdir: str):
        """
        下载远程的文件夹到本地文件夹 
        例如 download_dir('/test','G:/ftpdata/test10') 或者download_dir('test','G:/ftpdata/test10')
        后面这个会新建一个test文档  之前那个新建/test会报错 因此就不会创建
        :param remotedir: 远程文件夹
        :param localdir: 本地文件夹
        :return: 
        """
        try:
            os.makedirs(localdir)  # 由于我之前的处理是 将文件夹直接加到了 本地连接的后面 所以需要先新建一个文件夹
        except OSError:
            print("本地文件已经存在,不进行新建")
            pass

        print("开始下载文件夹:从 ", remotedir, " 到 ", localdir)
        os.chdir(localdir)
        self.walk(remotedir, localdir)
        print("文件夹下载结束")

    def get_dirs_files(self):
        """
        获取当前目录的文件夹和文件 
        :return: (当前目录下的文件,当前目录下的文件夹)
        """
        dir_res = []
        self.ftp.dir('.', dir_res.append)
        files = [f.split(None, 8)[-1] for f in dir_res if f.startswith('-')]
        dirs = [f.split(None, 8)[-1] for f in dir_res if f.startswith('d')]
        return (files, dirs)

    def walk(self, remotedir, localdir):
        """
        在文件夹内部递归 单个传递每一个文件 直到文件夹内部文件全部传递完毕
        :param remotedir: 远程文件夹
        :param localdir: 本地文件夹
        :return: 
        """
        print('Walking to', remotedir, os.getcwd())
        self.ftp.cwd(remotedir)
        try:
            os.mkdir(remotedir)
        except OSError:
            print("创建文件夹失败,文件夹可能已经存在")
            pass
        os.chdir(localdir)
        print("now dir", os.getcwd())
        ftp_curr_dir = self.ftp.pwd()

        print("local dir", localdir)
        files, dirs = self.get_dirs_files()
        print("FILES: ", files)
        print("DIRS: ", dirs)
        for f in files:
            print(remotedir, ':', f)
            outf = open(f, 'wb')
            try:
                self.ftp.retrbinary('RETR %s' % f, outf.write)
            finally:
                outf.close()
        for d in dirs:
            print("Dir:", d, ftp_curr_dir)
            os.chdir(localdir)
            #self.ftp.cwd(ftp_curr_dir)
            self.walk(d, os.path.join(localdir, d))
        self.ftp.cwd('..')  #不加这句的话  只能递归一层 之后会出错

    def uploadFile(self, remotepath='./', localpath='./'):
        print("Upload", localpath, remotepath, os.path.isfile(localpath))
        if not os.path.isfile(localpath):
            return
        print('+++ upload %s to %s' % (localpath, remotepath))
        self.ftp.storbinary('STOR ' + remotepath, open(localpath, 'rb'))

    def upload_dir(self, remotedir='./', localdir='./'):
        '''
        实现文件的上传
        :param localdir: 
        :param remotedir: 
        :return: 
        '''
        if not os.path.isdir(localdir):
            return
        print("Upload dir", remotedir, localdir)
        try:
            self.ftp.cwd(remotedir)
        except:
            self.ftp.mkd(remotedir)
            self.ftp.cwd(remotedir)
            print("远程文件夹创建成功")
        for file in os.listdir(localdir):
            # src = os.path.join(localdir, file)
            src = localdir + '/' + file
            print(src)
            if os.path.isfile(src):
                print("is file")
                self.uploadFile(file, src)
            elif os.path.isdir(src):
                try:
                    self.ftp.mkd(file)
                except:
                    sys.stderr.write('the dir is exists %s' % file)
                self.upload_dir(file, src)
        self.ftp.cwd('..')

    def upload_file(self, remotepath: str, localpath: str):
        """
        上传本地文件到服务器
        :param remotepath:  远端路径
        :param localpath:  本地路径
        :return: None
        """
        while '//' in remotepath:
            remotepath = remotepath.replace('//', '/')
        while '//' in localpath:
            localpath = localpath.replace('//', '/')
        print(remotepath, localpath)
        if os.path.isdir(remotepath) or len(remotepath.split('.')) == 1:  #是文件夹
            self.upload_dir(remotepath, localpath)
            return

        bufsize = 1024
        fp = open(localpath, 'rb')
        self.ftp.storbinary('STOR ' + remotepath, fp, bufsize)
        self.ftp.set_debuglevel(0)
        fp.close()
        print("上传本地文件:", localpath, "\t到远程:", remotepath, "成功")
Example #29
0
class ParentChildEvaluate:
    """
	Class to perform intrinsic evaluation of embeddings using the hierarchical relation of parent/child domains

	1) parse ParendChildTreeFile.txt from interpro
	2)	for each child of root
			nn = ask embeddings model to give M nearest neighbors
		calculate_precision_atM(child.descendants, nn)
		calculate_recall_atN(child.descendants, nn)
	3) plot histogram of precision and recall

	#Credits: https://medium.com/@m_n_malaeb/recall-and-precision-at-k-for-recommender-systems-618483226c54
	"""
    def __init__(self, data_path):
        """
		ParentChildEvaluate class init

		Parameters
		----------
		data_path : str
			full data path

		Returns
		-------
		None
		"""
        print("ParentChildEvaluate")
        self.data_path = data_path
        self.tree = Tree()

    def get_model_name(self):
        """
		Get embedding model name

		Parameters
		----------

		Returns
		-------
		str
			embedding model name
		"""
        return ntpath.basename(self.model_file)

    def load_emb_model(self, model_file, is_model_binary):
        """
		Load embedding model

		Parameters
		----------
		model_file : str
			model file name
		is_model_binary : bool
			model is saved in binary format (True), otherwise (False)

		Returns
		-------
		None
		"""
        self.model_file = model_file
        self.emb_model = KeyedVectors.load_word2vec_format(
            model_file, binary=is_model_binary)

    def parse_parent_child_file(self,
                                parent_child_file_name,
                                out_path,
                                output_file_name,
                                save_parsed_tree=False):
        """
		Parse the parent child file

		Parameters
		----------
		parent_child_file_name : str
			parent child file name
		out_path : str
			output data path
		output_file_name : str
			output file name
		save_parsed_tree : bool
			after parsing save parsed tree (True), otherwise (False)

		Returns
		-------
		None
		"""
        previous_num_minus_signs = 0
        last_interpro_id = None

        self.tree.create_node("INTERPRO", "INTERPRO")
        current_parent = "INTERPRO"
        with open(parent_child_file_name, 'r') as parent_child_file:
            for line in parent_child_file:
                line = line.strip()
                current_num_minus_signs = line[0:line.find("IPR")].count("--")
                double_colon_split = line.strip("--").split("::")
                interpro_id = double_colon_split[0]
                assert interpro_id[
                    0:
                    3] == "IPR", "AssertionError: {} \n interpro id should start with IPR and has length of 9.".format(
                        interpro_id)
                if current_num_minus_signs == 0:
                    # assert child not in the tree
                    current_parent = "INTERPRO"
                    self.tree.create_node(interpro_id,
                                          interpro_id,
                                          parent=current_parent)
                else:
                    # check if you are still with current parent or you need to create a new one
                    if current_num_minus_signs == previous_num_minus_signs:  # same level as last parent
                        self.tree.create_node(interpro_id,
                                              interpro_id,
                                              parent=current_parent)
                    elif current_num_minus_signs > previous_num_minus_signs:  # one level down from last parent -> create new parent
                        current_parent = last_interpro_id
                        self.tree.create_node(interpro_id,
                                              interpro_id,
                                              parent=current_parent)
                    else:  # one level up from last parent -> get parent of the current parent
                        if current_parent == "INTERPRO":  # if one level up is the root then your papa is the root
                            papa = "INTERPRO"
                        else:  # if one level up is not the root then get the parent of your parent (papa)
                            papa = self.tree[current_parent].bpointer
                        self.tree.create_node(interpro_id,
                                              interpro_id,
                                              parent=papa)
                        current_parent = papa
                previous_num_minus_signs = current_num_minus_signs
                last_interpro_id = interpro_id

        # quick test
        # for interpro_node in self.tree.children("IPR000549"):
        #	print(interpro_node.identifier)
        # self.tree.show()
        if save_parsed_tree:
            self.tree.save2file(
                filename=os.path.join(out_path, output_file_name))

    def get_nn_calculate_precision_recall_atN(self, N, plot_histograms,
                                              save_diagnostics):
        """
		Get nearest domain vector for each domains and calculate recall based on the ground truth (parsed tree)

		Parameters
		----------
		N : int
			number of nearest domain vector,
			if N==100 then retrieve as many as the children of a domain in the parsed tree
		plot_histograms : bool
			plot histograms for performance metrics (True), otherwise (False)
		save_diagnostics : bool
			save diagnostic plots for domain with low recall

		Returns
		-------
		None
		"""
        print("Get NN and calculate precision and recall at {}".format(N))
        recalls_n = []
        precisions_n = []
        interpros_recall0 = []
        interpros_num_children_recall0 = []

        if N == 100:
            retrieve_all_children = True
        else:
            retrieve_all_children = False

        for interpro_node in self.tree.children("INTERPRO"):
            recall_n = 0.0
            precision_n = 0.0
            all_children = self.tree.subtree(
                interpro_node.identifier).all_nodes()
            assert interpro_node in all_children, "AssertionError: parent {} is not in the set of all children.".format(
                interpro_node.identifier)
            all_children.remove(interpro_node)
            if retrieve_all_children:
                N = len(all_children)
            if self.emb_model.__contains__(interpro_node.identifier):
                nearest_neighbor_ids = set([
                    nn[0] for nn in self.emb_model.most_similar(
                        positive=interpro_node.identifier, topn=N)
                ])
            else:
                print("Model does not contain this id.")
                continue
            true_positives = set([child.identifier for child in all_children
                                  ]).intersection(nearest_neighbor_ids)
            assert len(all_children) > 0 and len(
                nearest_neighbor_ids
            ) == N, "AssertionError: For parent {} all children should be > 0 and nearest neighbors should be equal to N.".format(
                interpro_node.identifier)
            recall_n = len(true_positives) / len(all_children)
            precision_n = len(true_positives) / len(nearest_neighbor_ids)
            assert 0.0 <= recall_n <= 1.0 and 0.0 <= precision_n <= 1.0, "AssertionError: For parent {} recall or precision is not at (0,1]".format(
                interpro_node.identifier)
            recalls_n.append(recall_n)
            precisions_n.append(precision_n)
            if recall_n == 0.0:
                interpros_recall0.append(interpro_node.identifier)
                interpros_num_children_recall0.append(len(all_children))
        if retrieve_all_children:  # for printing in title
            N = 100
        if plot_histograms:
            if retrieve_all_children:
                self.plot_histogram(recalls_n, "Recall", "Recall",
                                    "Number of Interpro domains", "recall")
            else:
                self.plot_histogram(recalls_n, "Recall@{}".format(N), "Recall",
                                    "Number of Interpro domains",
                                    "recall_{}".format(N))
                self.plot_histogram(precisions_n, "Precision@{}".format(N),
                                    "Precision", "Number of Interpro domains",
                                    "precision_{}".format(N))
        if retrieve_all_children:
            avg_recall = sum(recalls_n) / len(recalls_n)
            print("Average recall at 100: {:.3f}".format(avg_recall))
        if save_diagnostics:
            self.save_diagnostics_recall0(interpros_recall0,
                                          interpros_num_children_recall0)

    def save_diagnostics_recall0(self, interpros_recall0,
                                 interpros_num_children_recall0):
        """
		Save diagnostics histogram for domains with recall of 0

		Parameters
		----------
		interpros_recall0 : list of str
			interpro ids with recall 0
		interpros_num_children_recall0 : list of str
			number of children of each interpro id, found from the parsed tree, with recall 0
		Returns
		-------
		None
		"""
        print("Saving diagnostics for intepro domains with recall 0")
        with open(
                os.path.join(
                    self.data_path,
                    self.get_model_name() + "_interpros_recall0" + ".txt"),
                "w") as interpros_recall0_file:
            # write file with names of interpro having recall 0
            interpros_recall0_file.write("\n".join(interpros_recall0))
        # plot histogram of number of children for interpro parents with recall 0
        self.plot_histogram(interpros_num_children_recall0, None,
                            "Number of Intepro domains", "Number of children",
                            "hist")

    def plot_histogram(self, performance_N, title, xlabel, ylabel, out_suffix):
        """
		Plot histogram for performance metric and also for the number of children

		Parameters
		----------
		performance_N : list of float
			performance metric value per parent domain
		title : str
			histogram title (if not None)
		xlabel : str
			label x
		ylabel : str
			label y
		out_suffix : str
			histogram output file name suffix

		Returns
		-------
		None
		"""
        # plot the histogram of lengths
        fig = plt.figure()
        plt.hist(performance_N,
                 color='g',
                 align='left',
                 edgecolor='k',
                 alpha=0.8)
        plt.xlabel(xlabel, fontsize=14)
        plt.ylabel(ylabel, fontsize=14)
        if title is not None:
            plt.title(title, fontsize=14)
        plt.xticks(np.arange(0, 1.1, 0.1))
        hist_name = self.get_model_name() + "_" + out_suffix + ".png"
        fig.savefig(os.path.join(self.data_path, hist_name),
                    bbox_inches='tight',
                    dpi=600)
Example #30
0
def tree_answer(input_json):
    root_post = input_json["root_post"]["root_post"]
    posts = input_json["posts"]
    ids = posts["ids"]
    text = posts["text"]
    parent_ids = posts["parent_ids"]
    scores = posts["scores"]
    categories = posts["categories"]
    posts = []
    for i in range(len(ids)):
        posts.append(
            [ids[i], text[i], parent_ids[i], categories[i], scores[i]])

    # root_post: id, text, category, score
    # other posts: id, text, parent id, category, score
    tree = Tree()

    root_post[1] = root_post[1].replace('\n', ' ')
    for post in posts:
        post[1] = post[1].replace('\n', ' ')

    tree.create_node("root post",
                     root_post[0],
                     data=ForumPost(root_post[1], "none", "none"))

    for post in posts:
        id = post[0]
        text = post[1]
        parent_id = post[2]
        category = post[3]
        score = post[4]

        tree.create_node(str(score) + ": " + category + ": " + text[:20],
                         id,
                         parent=parent_id,
                         data=ForumPost(text, category, score))
    """

    Stuff now:

    Go through each child to the root. If solution:

    In each subtree, find number of helpfuls, add up. 
    Put the child: [name, # of helpfuls] in a results array

    """

    children_to_root = [tree[node].identifier for node in tree.expand_tree()]
    results = []
    for identifier in children_to_root:

        if (tree[identifier].data.category == "solution"):

            sub_t = tree.subtree(identifier)
            sub_t_scores = [
                tree[node].data.score for node in sub_t.expand_tree()
            ]
            sub_t_categories = [
                tree[node].data.category for node in sub_t.expand_tree()
            ]

            total_score = 0

            for i in range(len(sub_t_scores)):
                if sub_t_categories[i] != "other":
                    total_score += sub_t_scores[i]
            results.append([total_score, tree[identifier].data.text])

    results.sort(key=lambda x: x[0], reverse=True)
    score = []
    post = []

    for i in results:
        score.append(i[0])
        post.append(i[1])
    return (score, post)
Example #31
0
sep = "-" * 20 + "\n"

print(sep + "Tree of the whole family:")
tree.show(key=lambda x: x.tag, reverse=True)

print(sep + "All family members in DEPTH mode:")
for node in tree.expand_tree(mode=Tree.DEPTH):
    print(tree[node].tag)

print(sep + "All family members without Diane sub-family:")
tree.show(idhidden=False, filter=lambda x: x.identifier != "diane")
# for node in tree.expand_tree(filter=lambda x: x.identifier != 'diane', mode=Tree.DEPTH):
#     print tree[node].tag

print(sep + "Let me introduce Diane family only:")
sub_t = tree.subtree("diane")
sub_t.show()

print(sep + "Children of Diane")
for child in tree.is_branch("diane"):
    print(tree[child].tag)

print(sep + "OOhh~ new members join Jill's family:")
new_tree = Tree()
new_tree.create_node("n1", 1)  # root node
new_tree.create_node("n2", 2, parent=1)
new_tree.create_node("n3", 3, parent=1)
tree.paste("jill", new_tree)
tree.show()

print(sep + "They leave after a while:")
Example #32
0
 def _get_descendants_from_tree(node_identifier: str,
                                tree: Tree) -> Set[str]:
     sub_tree = tree.subtree(node_identifier)
     descendants = {node.identifier for node in sub_tree.all_nodes()}
     return descendants