Example #1
0
def draw_tree(recipe_inst):
    '''
    from ete3 import Tree
    recipe_inst = [{'word': 'heated', 'ingredient':['rice','banana','cookie','dishes']},
                   {'word': 'boil', 'ingredient':['apple','banana','cookie','dish']},
                   {'word': 'rince', 'ingredient':['apple','banana','cookie','dish']}
                  ]
    '''
    # sorting will not improve the tree edit distance
    # if sort:
    #    recipe_inst = [{'word':line['word'], 'ingredient': sorted(line['ingredient'])} for line in recipe_inst]

    output = Tree()
    temp = output
    for i in recipe_inst:
        t = Tree(name=i['word'])
        t.add_feature('type', 'action')
        if not i['ingredient']:
            pass
        else:
            for j in i['ingredient']:
                a = t.get_tree_root().add_child(name=j)
                a.add_feature('type', 'ingredient')
            temp = temp.add_child(t)
    print(output.get_ascii(show_internal=True))
    return output
Example #2
0
 def species_tree(self, taxid):
     sn = self.taxa(taxid)
     tree = Tree(
         name=sn.taxid)  # PhyloTree? Has annotate_ncbi_taxa() method.
     tree.add_feature(spname=sn.spname, rank=sn.rank)
     tree.children.extend(self.species_tree(taxid) for taxid in sn.children)
     return tree
Example #3
0
def createNode():
    """Creates a domain node with required fields precreated"""
    node = Tree()
    node.name = 'placeholder'
    node.add_feature('pos', 0)
    node.add_feature('event', 'SPECIATION')
    node.dist = 0
    return node
def ASR_parser(args):
    try:
        import cPickle as pickle
    except:
        import pickle
    from GCutils import CollapsedForest, CollapsedTree, hamming_distance

    try:
        tree = Tree(args.tree, format=1)
    except Exception as e:
        print(e)
        raise TreeFileParsingError('Could not read the input tree. Is this really newick format?')

    counts = {l.split(',')[0]:int(l.split(',')[1]) for l in open(args.counts)}
    tree.add_feature('frequency', 0)       # Placeholder will be deleted when rerooting
    tree.add_feature('sequence', 'DUMMY')  # Placeholder will be deleted when rerooting
    tree = map_asr_to_tree(args.asr_seq, args.leaf_seq, tree, args.naive, counts)

    # Reroot to make the naive sequence the real root instead of just an outgroup:
    tree = reroot_tree(tree, pattern=args.naive)

    # Recompute branch lengths as hamming distances:
    tree.dist = 0  # No branch above root
    for node in tree.iter_descendants():
        node.dist = hamming_distance(node.sequence, node.up.sequence)

    iqtree_tree = CollapsedTree(tree=tree, name=args.name)
    # Add colors:
    if args.colormap is not None:
        with open(args.colormap, 'rb') as fh:
            colormap = pickle.load(fh)
        with open(args.idmap, 'rb') as fh:
            id_map = pickle.load(fh)
        # Reverse the id_map:
        id_map = {cs:seq_id for seq_id, cell_ids in id_map.items() for cs in cell_ids}
        # Expand the colormap and map to sequence ids:
        colormap_seqid = dict()
        for key, color in colormap.items():
            if isinstance(key, str) and key in id_map:
                colormap_seqid[id_map[key]] = color
            else:
                for cell_id in key:
                    if cell_id in id_map:
                        colormap_seqid[id_map[cell_id]] = color
        colormap = colormap_seqid
    else:
        colormap = None
    iqtree_tree.render(args.outbase + '.svg', colormap=colormap)
    iqtree_forest = CollapsedForest(forest=[iqtree_tree], name=args.name)
    # Dump tree as newick:
    iqtree_forest.write_random_tree(args.outbase+'.tree')
    print('number of trees with integer branch lengths:', iqtree_forest.n_trees)

    with open(args.outbase + '.p', 'wb') as f:
        pickle.dump(iqtree_forest, f)

    print('Done parsing IQ-TREE tree')
Example #5
0
def ASR_parser(args):
    try:
        import cPickle as pickle
    except:
        import pickle
    from gctree import CollapsedForest, CollapsedTree, hamming_distance

    try:
        tree = Tree(args.tree)
    except:
        raise TreeFileParsingError(
            'Could not read the input tree. Is this really newick format?')

    counts = {l.split(',')[0]: int(l.split(',')[1]) for l in open(args.counts)}
    tree.add_feature('frequency',
                     0)  # Placeholder will be deleted when rerooting
    tree.add_feature('sequence',
                     'DUMMY')  # Placeholder will be deleted when rerooting
    tree = map_asr_to_tree(args.asr_seq, tree, args.naive, counts)

    # Reroot to make the naive sequence the real root instead of just an outgroup:
    tree = reroot_tree(tree)

    # Recompute branch lengths as hamming distances:
    tree.dist = 0  # No branch above root
    for node in tree.iter_descendants():
        node.dist = hamming_distance(node.sequence, node.up.sequence)

    igphyml_tree = CollapsedTree(tree=tree)
    igphyml_tree.render(args.outbase + '.svg')
    igphyml_forest = CollapsedForest(forest=[igphyml_tree])
    print('number of trees with integer branch lengths:',
          igphyml_forest.n_trees)

    # check for unifurcations at root
    unifurcations = sum(
        tree.tree.frequency == 0 and len(tree.tree.children) == 1
        for tree in igphyml_forest.forest)
    if unifurcations:
        print(
            'WARNING: {} trees exhibit unifurcation from root, which is not possible under current model. Such nodes will be ommitted from likelihood calculation'
            .format(unifurcations))

    with open(args.outbase + '.p', 'wb') as f:
        pickle.dump(igphyml_forest, f)

    print('Done parsing IgPhyML tree')
Example #6
0
def generateIQTree():

    sd = 1 #startingDomains

    hostTree = createRandomTopology(1, 1, lambda x: x)
    guestTree, nodeMap = buildGuestTree(hostTree, s2, expfunc, .2, gaussNoise, sd)

    rootSequence = grs(sd)
    evolveAlongTree(hostTree, guestTree, nodeMap, rootSequence, hmmfile, emissionProbs, transmat)

    names, seqs = [], []

    for node in hostTree:
        if HMMER:
            seqs += findDomains(node.sequence, hmmfile)[2]
        else:
            seqs += findMotifs(node.sequence, hmmfile)[2]
        gnodes = findLeaves(nodeMap[node])
        n = [(leaf.position, leaf.name) for leaf in gnodes if leaf.event != 'LOSS']
        n.sort()
        names += [name[1] for name in n]

    guestTree = prune(guestTree, names)
    outgroup = Tree()
    outgroup.up = guestTree
    guestTree.children.append(outgroup)
    outgroup.name = 'Outgroup'
    outseq = evolveSequence(rootSequence, .1, 2, emissionProbs, hmmfile, transmat)
    if HMMER:
        outseq = findDomains(outseq, hmmfile)[2][0]
    else:
        outseq = findMotifs(outseq, hmmfile)[2][0]
    outgroup.add_feature('sequence', outseq)
    seqs.insert(0, outseq)
    names.insert(0, 'Outgroup')

    guestTree.write(outfile = 'testtree.nwk')
    hostTree.write(outfile='hosttree.nwk')
    addRandomTrees('testtree.nwk')

    writeFasta(names, seqs, 'testfasta.fa', False)
    mlTree('testfasta.fa', 'testtree.nwk', True)
    iqtree = Tree('testfasta.fa.treefile')
    iqtree.set_outgroup(iqtree&('Outgroup'))

    return hostTree, guestTree, iqtree
Example #7
0
def build_tree(sequences, parents, counts=None, naive='naive'):
    # build an ete tree
    # first a dictionary of disconnected nodes
    nodes = {}
    for name in sequences:
        node = Tree()
        node.name = name
        node.add_feature('nuc_seq', sequences[node.name])
        node.add_feature('aa_seq', local_translate(sequences[node.name]))
        if counts is not None and node.name in counts:
            node.add_feature('frequency', counts[node.name])
        else:
            node.add_feature('frequency', 0)
        nodes[name] = node
    for name in sequences:
        if name in parents:
            nodes[parents[name]].add_child(nodes[name])
        else:
            tree = nodes[name]
    # Reroot on naive:
    if naive is not None:
        naive_id = [n for n in nodes if naive in n][0]
        assert len(nodes[naive_id].children) == 0
        naive_parent = nodes[naive_id].up
        naive_parent.remove_child(nodes[naive_id])
        nodes[naive_id].add_child(naive_parent)
        # remove possible unecessary unifurcation after rerooting
        if len(naive_parent.children) == 1:
            naive_parent.delete(prevent_nondicotomic=False)
            naive_parent.children[0].dist = hamming_distance(
                naive_parent.children[0].nuc_seq, nodes[naive_id].nuc_seq)
        tree = nodes[naive_id]

    # make random choices for ambiguous bases
    tree = disambiguate(tree)

    # compute branch lengths
    tree.dist = 0  # no branch above root
    for node in tree.iter_descendants():
        node.dist = hamming_distance(node.nuc_seq, node.up.nuc_seq)

    return tree
Example #8
0
def build_tree(sequences: Dict[str, str],
               parents: Dict[str, str],
               counts=None,
               root="root"):
    """Build an ete tree from sequences and parents dictionaries.

    Args:
        sequences: a dictionary mapping names to sequences
        parents: a dictionary mapping parent node names to child node names
        counts: a dictionary mapping node names to observed abundances. This argument
            is no longer used in the main gctree inference pipeline (counts are assigned in DAG)
            but remains for compatibility.
        root: the name of the root node
    """
    # build an ete tree
    # first a dictionary of disconnected nodes
    nodes = {}
    for name in sequences:
        node = Tree()
        node.name = name
        node.add_feature("sequence", sequences[node.name])
        if counts is not None:
            if node.name in counts:
                node.add_feature("abundance", counts[node.name])
            else:
                node.add_feature("abundance", 0)
        nodes[name] = node
    for name in sequences:
        if name in parents:
            nodes[parents[name]].add_child(nodes[name])
        else:
            tree = nodes[name]
    # reroot on root
    if root is not None:
        try:
            root_id = [node for node in nodes if root in node][0]
        except IndexError:
            raise RuntimeError(
                f"Provided root id '{root}' not found in dnapars tree.")
        assert len(nodes[root_id].children) == 0
        root_parent = nodes[root_id].up
        root_parent.remove_child(nodes[root_id])
        nodes[root_id].add_child(root_parent)
        # remove possible unecessary unifurcation after rerooting
        if len(root_parent.children) == 1:
            root_parent.delete(prevent_nondicotomic=False)
            root_parent.children[0].dist = gctree.utils.hamming_distance(
                root_parent.children[0].sequence, nodes[root_id].sequence)
        tree = nodes[root_id]

    return tree
Example #9
0
def build_tree(sequences, parents, counts=None, naive='naive'):
    # build an ete tree
    # first a dictionary of disconnected nodes
    nodes = {}
    for name in sequences:
        node = Tree()
        node.name = name
        node.add_feature('sequence', sequences[node.name])

        ### Removed by KD because it is replaced by a count file
        #            if '_' in node.name:
        #                node.add_feature('frequency', int(node.name.split('_')[-1]))
        #                node.name = '_'.join(node.name.split('_')[:-1])
        #            else:
        #                node.add_feature('frequency', 0)
        if counts is not None:
            if node.name in counts:
                node.add_feature('frequency', counts[node.name])
            else:
                node.add_feature('frequency', 0)
        nodes[name] = node
    for name in sequences:
        if name in parents:
            nodes[parents[name]].add_child(nodes[name])
        else:
            tree = nodes[name]
    # reroot on naive
    if naive is not None:
        naive_id = [node for node in nodes if naive in node][0]
        assert len(nodes[naive_id].children) == 0
        assert nodes[naive_id] in tree.children
        tree.remove_child(nodes[naive_id])
        nodes[naive_id].add_child(tree)
        tree = nodes[naive_id]

    # make random choices for ambiguous bases
    tree = disambiguate(tree)

    # compute branch lengths
    tree.dist = 0  # no branch above root
    for node in tree.iter_descendants():
        node.dist = gctree.hamming_distance(node.sequence, node.up.sequence)

    return tree
Example #10
0
def parse_union_tree(history_1, history_2, base_tree_path, debug=False):
    base_tree = Tree(base_tree_path, format=1)
    # add for debugging
    base_tree.get_tree_root().name = "_baseInternal_30"
    united_tree = Tree()
    united_tree.dist = 0  # initialize distance to 0
    united_tree.get_tree_root().name = history_1.get_tree_root(
    ).name  # set the name of the root
    united_tree.add_feature("history_1_label", history_1.get_tree_root().label)
    united_tree.add_feature("history_2_label", history_2.get_tree_root().label)
    union_nodes_number = 0
    for original_node in base_tree.traverse(
            "preorder"
    ):  # traverse the tree in pre-order to assure that for any visited node, its parent from the base branch is already in the united tree
        original_parent = original_node.up
        if original_parent != None:  # will be none only in the case the original node is the root
            if debug:
                print("handled branch: (", original_node.name, ",",
                      original_parent.name, ")")
            curr_union_parent = united_tree.search_nodes(
                name=original_parent.name)[0]
            hist_1_done = True
            hist_1_curr_child = None
            hist_1_parent = history_1.search_nodes(name=original_parent.name)[
                0]  # need to check names consistency across the 3 trees
            for child in hist_1_parent.children:
                if len(base_tree.search_nodes(name=child.name)) == 0 and len(
                        child.search_nodes(name=original_node.name)
                ) > 0:  # if the child is a root in a tree that holds the original child node, then this child must be on the branch of interest
                    hist_1_curr_child = child
                    hist_1_done = False
                    break
            if hist_1_done:
                hist_1_curr_child = history_1.search_nodes(
                    name=original_node.name)[0]
            hist_1_current_label = hist_1_curr_child.label

            hist_2_done = True
            hist_2_curr_child = None
            hist_2_parent = history_2.search_nodes(name=original_parent.name)[
                0]  # need to check names consistency across the 3 trees
            for child in hist_2_parent.children:
                if len(base_tree.search_nodes(name=child.name)) == 0 and len(
                        child.search_nodes(name=original_node.name)
                ) > 0:  # if the child is a root in a tree that holds the original child node, then this child must be on the branch of interest
                    hist_2_curr_child = child
                    hist_2_done = False
                    break
            if hist_2_done:
                hist_2_curr_child = history_2.search_nodes(
                    name=original_node.name)[0]
            hist_2_current_label = hist_2_curr_child.label

            while not hist_1_done or not hist_2_done:

                hist_1_dist = float("inf")
                hist_2_dist = float("inf")
                if not hist_1_done:  # if there is a node closer to the original node in history 1 -> add it to the united tree first
                    hist_1_dist = hist_1_curr_child.get_distance(
                        original_parent.name) - curr_union_parent.get_distance(
                            original_parent.name)
                if not hist_2_done:
                    hist_2_dist = hist_2_curr_child.get_distance(
                        original_parent.name) - curr_union_parent.get_distance(
                            original_parent.name)

                if debug:
                    if not hist_1_done:
                        print("history 1 has current child of ",
                              original_parent.name, ": ",
                              hist_1_curr_child.name, " with label: ",
                              hist_1_current_label,
                              " and distance from parent is: ", hist_1_dist)
                    if not hist_2_done:
                        print("history 2 has current child of ",
                              original_parent.name, ": ",
                              hist_2_curr_child.name, " with label: ",
                              hist_2_current_label,
                              " and distance from parent is: ", hist_2_dist)

                # first, check if now the two current children have the same name, and if this name is in the base tree - exit
                if hist_1_curr_child.name == hist_2_curr_child.name and len(
                        base_tree.search_nodes(
                            name=hist_1_curr_child.name)) > 0:
                    break

                # else, at least one of the histories has more than one step to go before reaching the bottom of the branch
                if hist_1_dist < hist_2_dist:  # add the node from history 1 and travel down to the next node in history 1
                    if debug:
                        print(
                            "adding child from history 1 which precedes to the one from history 2"
                        )
                        print("the label of the added node in history 1 is: ",
                              hist_1_curr_child.label)
                        print(
                            "the label of the added node in histroy 2 remains like papa: ",
                            hist_2_current_label)
                    curr_union_parent = curr_union_parent.add_child(
                        child=None,
                        name="internal_" + str(union_nodes_number),
                        dist=hist_1_dist,
                        support=None)
                    curr_union_parent.add_feature("history_1_label",
                                                  hist_1_curr_child.label)
                    curr_union_parent.add_feature("history_2_label",
                                                  hist_2_current_label)
                    hist_1_parent = hist_1_curr_child
                    if len(hist_1_parent.children) == 1:
                        hist_1_curr_child = hist_1_parent.children[0]
                    else:
                        hist_1_done = True
                    if debug:
                        print("united tree is now: \n", united_tree)
                        if hist_1_done:
                            print(
                                "history 1 on the handled branch is complete")
                        else:
                            print(
                                "history 1 on the handled branch isn't complete yet"
                            )

                else:  # add the node from history 2 and travel down to the next node in history 2
                    if debug:
                        print(
                            "adding child from history 2 which precedes to the one from history 1"
                        )
                        print("the label of the added node in history 2 is: ",
                              hist_2_curr_child.label)
                        print(
                            "the label of the added node in history 1 remains like papa: ",
                            hist_1_current_label)
                    curr_union_parent = curr_union_parent.add_child(
                        child=None,
                        name="internal_" + str(union_nodes_number),
                        dist=hist_2_dist)  # added as a new branch
                    curr_union_parent.add_feature("history_1_label",
                                                  hist_1_current_label)
                    curr_union_parent.add_feature("history_2_label",
                                                  hist_2_curr_child.label)
                    hist_2_parent = hist_2_curr_child
                    if len(hist_2_parent.children) == 1:
                        hist_2_curr_child = hist_2_parent.children[0]
                    else:
                        hist_2_done = True
                    if debug:
                        print("united tree is now: \n", united_tree)
                        if hist_2_done:
                            print(
                                "history 2 on the handled branch is complete")
                        else:
                            print(
                                "history 2 on the handled branch isn't complete yet"
                            )
                union_nodes_number += 1

            # now add the original node as the child of the current parent
            original_dist = original_node.dist
            residual = original_dist - curr_union_parent.get_distance(
                united_tree.search_nodes(name=original_parent.name)[0])
            curr_union_parent = curr_union_parent.add_child(
                child=None, name=original_node.name, dist=residual)
            curr_union_parent.add_feature(
                "history_1_label",
                history_1.search_nodes(name=original_node.name)[0].label)
            curr_union_parent.add_feature(
                "history_2_label",
                history_2.search_nodes(name=original_node.name)[0].label)

    return united_tree
Example #11
0
'''
 Node Style End
#######################'''
 
 
for r in xrange(rows):
    cell_id = trackResult[r, 0]
    time_begin = trackResult[r, 1]
    time_end = trackResult[r, 2]
    parent_id = trackResult[r, 3]
    time_duration = np.abs(time_begin-time_end)
    # for root
    if parent_id == 0:
        # Add name to root for the first iteration
        root.add_feature("name", str(cell_id))
        # change the branch length
        root.add_feature("dist", time_duration)
        #change node style
        root.set_style(ns_root)
        
        # set node name to face
        nameFace = TextFace(root.name)
        nameFace.fgcolor = "white"
        nameFace.fsize = 15
#        nameFace.border.width = 1
        nameFace.background.color = "green"
        node_cur.add_face(nameFace, column=1, position="branch-bottom")
    
    else:  # for child
        #### search the parent node by parent_id
Example #12
0
R = LG_matrix['R']
Q = LG_matrix['Q']
PI = LG_matrix['PI']
amino_acids = LG_matrix['amino_acids']
aa2idx = {}
for i in range(len(amino_acids)):
    aa2idx[amino_acids[i]] = i

## sample sequence for the root node from the equilibrium
## distribution of amino acids
len_protein = 100
root_seq = nrand.choice(amino_acids,
                        size=len_protein,
                        replace=True,
                        p=PI.reshape(-1) / np.sum(PI))
t.add_feature('seq', root_seq)

## simulate sequences for each node
## the evolution process is modelled as a continous-time Markov chain.
## the following script is used for simulating the continous-time Markov chain.
for node in t.traverse('preorder'):
    if node.is_root():
        continue
    anc_node = node.up

    seq = np.copy(anc_node.seq)
    dist = node.dist

    while True:
        tot_rate = -np.sum([Q_dict[(aa, aa)] for aa in seq])
        wait_time = nrand.exponential(scale=1 / tot_rate)
Example #13
0
#ns["hz_line_width"] = 1.5
#ns["vt_line_width"] = 1.5
'''
 Node Style End
#######################'''

for r in xrange(rows):
    cell_id = trackResult[r, 0]
    time_begin = trackResult[r, 1]
    time_end = trackResult[r, 2]
    parent_id = trackResult[r, 3]
    time_duration = np.abs(time_begin - time_end)
    # for root
    if parent_id == 0:
        # Add name to root for the first iteration
        root.add_feature("name", str(cell_id))
        # change the branch length
        root.add_feature("dist", time_duration)
        #change node style
        root.set_style(ns_root)

        # set node name to face
        nameFace = TextFace(root.name)
        nameFace.fgcolor = "white"
        nameFace.fsize = 15
        #        nameFace.border.width = 1
        nameFace.background.color = "green"
        node_cur.add_face(nameFace, column=1, position="branch-bottom")

    else:  # for child
        #### search the parent node by parent_id
Example #14
0
class um_tree:
    def __init__(self, tree):
        self.tree = Tree(tree, format=1)
        self.tree.resolve_polytomy(default_dist=0.000001, recursive=True)
        self.tree.dist = 0
        self.tree.add_feature("age", 0)
        self.nodes = self.tree.get_descendants()
        internal_node = []
        cnt = 0
        for n in self.nodes:
            node_age = n.get_distance(self.tree)
            n.add_feature("age", node_age)
            if not n.is_leaf():
                n.add_feature("id", cnt)
                cnt = cnt + 1
                internal_node.append(n)
        self.nodes = internal_node
        one_leaf = self.tree.get_farthest_node()[0]
        one_leaf.add_feature("id", cnt + 1)
        if one_leaf.is_leaf():
            self.nodes.append(one_leaf)
        self.nodes.sort(key=self.__compare_node)
        self.species_list = []
        self.coa_roots = None

    def __compare_node(self, node):
        return node.age

    def get_waiting_times(self, threshold_node=None, threshold_node_idx=0):
        wt_list = []
        reach_t = False
        curr_age = 0.0
        curr_spe = 2
        curr_num_coa = 0
        coa_roots = []
        min_brl = 1000
        num_spe = -1

        if threshold_node == None:
            threshold_node = self.nodes[threshold_node_idx]

        last_coa_num = 0
        tcnt = 0
        for node in self.nodes:
            num_children = len(node.get_children())
            wt = None
            times = node.age - curr_age
            if times >= 0:
                if times < min_brl and times > 0:
                    min_brl = times
                curr_age = node.age
                assert curr_spe >= 0

                if reach_t:
                    if tcnt == 0:
                        last_coa_num = 2
                    fnode = node.up
                    coa_root = None

                    idx = 0

                    while not fnode.is_root():
                        idx = 0
                        for coa_r in coa_roots:
                            if coa_r.id == fnode.id:
                                coa_root = coa_r
                                break
                                idx = idx + 1
                        if coa_root != None:
                            break
                        else:

                            fnode = fnode.up

                    wt = waiting_time(length=times,
                                      num_coas=curr_num_coa,
                                      num_lines=curr_spe)

                    for coa_r in coa_roots:
                        coa = coalescent(num_individual=coa_r.curr_n)
                        wt.coas.add_coalescent(coa)

                    wt.coas.coas_idx = last_coa_num
                    wt.num_curr_coa = last_coa_num
                    if coa_root == None:  #here can be modified to use multiple T
                        curr_spe = curr_spe - 1
                        curr_num_coa = curr_num_coa + 1
                        node.add_feature("curr_n", 2)
                        coa_roots.append(node)
                        last_coa_num = 2
                    else:
                        curr_n = coa_root.curr_n
                        coa_root.add_feature("curr_n", curr_n + 1)
                        last_coa_num = curr_n + 1
                    tcnt = tcnt + 1

                else:
                    if node.id == threshold_node.id:
                        reach_t = True
                        tcnt = 0
                        wt = waiting_time(length=times,
                                          num_coas=0,
                                          num_lines=curr_spe)
                        num_spe = curr_spe
                        curr_spe = curr_spe - 1
                        curr_num_coa = 2
                        node.add_feature("curr_n", 2)
                        coa_roots.append(node)
                    else:
                        wt = waiting_time(length=times,
                                          num_coas=0,
                                          num_lines=curr_spe)
                        curr_spe = curr_spe + 1
            if times > 0.00000001:

                wt_list.append(wt)

        for wt in wt_list:
            wt.count_num_lines()

        self.species_list = []
        all_coa_leaves = []
        self.coa_roots = coa_roots
        for coa_r in coa_roots:
            leaves = coa_r.get_leaves()
            all_coa_leaves.extend(leaves)
            self.species_list.append(leaves)

        all_leaves = self.tree.get_leaves()
        for leaf in all_leaves:
            if leaf not in all_coa_leaves:
                self.species_list.append([leaf])

        return wt_list, num_spe

    def show(self, wt_list):
        cnt = 1
        for wt in wt_list:
            print("Waitting interval " + repr(cnt))
            print(wt)
            cnt = cnt + 1

    def get_species(self):
        sp_list = []
        for sp in self.species_list:
            spe = []
            for taxa in sp:
                spe.append(taxa.name)
            sp_list.append(spe)

        all_taxa_name = []

        for leaf in self.tree.get_leaves():
            all_taxa_name.append(leaf.name)

        style0 = NodeStyle()
        style0["fgcolor"] = "#000000"
        style0["vt_line_color"] = "#0000aa"
        style0["hz_line_color"] = "#0000aa"
        style0["vt_line_width"] = 2
        style0["hz_line_width"] = 2
        style0["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
        style0["hz_line_type"] = 0
        style0["size"] = 0
        for node in self.tree.get_descendants():
            node.set_style(style0)
            node.img_style["size"] = 0
        self.tree.set_style(style0)
        self.tree.img_style["size"] = 0
        style1 = NodeStyle()
        style1["fgcolor"] = "#000000"
        style1["vt_line_color"] = "#ff0000"
        style1["hz_line_color"] = "#0000aa"
        style1["vt_line_width"] = 2
        style1["hz_line_width"] = 2
        style1["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
        style1["hz_line_type"] = 0
        style1["size"] = 0
        style2 = NodeStyle()
        style2["fgcolor"] = "#0f0f0f"
        style2["vt_line_color"] = "#ff0000"
        style2["hz_line_color"] = "#ff0000"
        style2["vt_line_width"] = 2
        style2["hz_line_width"] = 2
        style2["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
        style2["hz_line_type"] = 0
        style2["size"] = 0
        for node in self.coa_roots:
            node.set_style(style1)
            node.img_style["size"] = 0
            for des in node.get_descendants():
                des.set_style(style2)
                des.img_style["size"] = 0
        return [all_taxa_name], sp_list

    def print_species(self, save_file):
        cnt = 1
        file3 = open(os.path.join(save_file, "partition.txt"), "w+")
        for sp in self.species_list:
            print("Species " + repr(cnt) + ":", file=file3)
            cnt = cnt + 1
            taxas = ""
            for taxa in sp:
                taxas = taxas + taxa.name + ", "
            print("" + taxas[:-1], file=file3)

    def print_species_spart(self, save_file):
        cnt = 1
        file3 = open(os.path.join(save_file, "partition.spart"), "w+")
        # for sp in self.species_list:
        #     print("Species " + repr(cnt) + ":", file= file3)
        #     cnt = cnt + 1
        #     taxas = ""
        #     for taxa in sp:
        #         print(taxa)
        #         taxas = taxas + taxa.name + ", "
        #     print("" + taxas[:-1], file= file3)

        file3.write("Filename=GMYC delimitation\n")
        file3.write(f'{datetime.datetime.now().astimezone().isoformat()}\n\n')
        file3.write(f"Npartition={1};GMYC\n")

        file3.write(f'Nsamples={sum(len(sp) for sp in self.species_list)}\n')
        file3.write(
            f'Nsubsets={len(self.species_list)};{",".join(["?" for i in range(len(self.species_list))])}\n\n'
        )
        file3.write("#this is my first comment\n")
        file3.write("#this is my second comment\n\n")
        file3.write("Assignment\n")
        cnt = 1
        for sp in self.species_list:
            print(repr(sp))
            for taxa in sp:
                print(repr(taxa))
                xx = taxa.name + "\t" + repr(cnt) + ";" + "?"
                file3.write(f"{xx}\n")
            cnt += 1

        file3.write("\nPartition_score=\n")
        file3.close()

    def output_species(self, taxa_order=[]):
        if len(taxa_order) == 0:
            taxa_order = self.tree.get_leaf_names()
        num_taxa = 0
        for sp in self.species_list:

            for taxa in sp:
                num_taxa = num_taxa + 1
        if not len(taxa_order) == num_taxa:

            print("error error, taxa_order != num_taxa!")
            return None, None
        else:
            partion = [-1] * num_taxa
            cnt = 1
            for sp in self.species_list:

                for taxa in sp:
                    idx = taxa_order.index(taxa.name)
                    partion[idx] = cnt
                cnt = cnt + 1
            return taxa_order, partion

    def num_lineages(self, wt_list, save_file):
        nl_list = []
        times = []
        last_time = 0.0
        for wt in wt_list:
            nl_list.append(wt.get_num_branches())
            times.append(last_time)
            last_time = wt.length + last_time
        plt.plot(times, nl_list)
        plt.ylabel('Number of lineages')
        plt.xlabel('Time')
        plt.savefig(os.path.join(save_file, "Time_Lines.png"))
Example #15
0
def parse_union_tree(history_1, history_2, base_tree_path, debug=False):
    base_tree = Tree(base_tree_path, format=1)
    base_tree.get_tree_root().name = "root"
    united_tree = Tree()
    united_tree.dist = 0  # initialize distance to 0
    united_tree.get_tree_root().name = history_1.get_tree_root(
    ).name  # set the name of the root
    united_tree.add_feature("history_1_label", history_1.get_tree_root().label)
    united_tree.add_feature("history_2_label", history_2.get_tree_root().label)
    union_nodes_number = 0
    for original_node in base_tree.traverse(
            "preorder"
    ):  # traverse the tree in pre-order to assure that for any visited node, its parent from the base branch is already in the united tree
        original_parent = original_node.up
        if original_parent != None:  # will be none only in the case the original node is the root
            if debug:
                print("handled branch: (", original_node.name, ",",
                      original_parent.name, ")")
            curr_union_parent = united_tree.search_nodes(
                name=original_parent.name.rstrip())[0]
            hist_1_done = True
            hist_1_curr_child = None
            hist_1_parent = history_1.search_nodes(
                name=original_parent.name.rstrip())[
                    0]  # need to check names consistency across the 3 trees
            for child in hist_1_parent.children:
                if len(
                        base_tree.search_nodes(name=child.name)
                ) == 0 and len(child.get_children()) == 1 and len(
                        child.search_nodes(name=original_node.name)
                ) > 0:  # if the child does not exist in the base tree, it represents a mapping node that was created out of breaking a branch in the original tree
                    hist_1_curr_child = child
                    hist_1_done = False
                    break
            if hist_1_done:
                hist_1_curr_child = history_1.search_nodes(
                    name=original_node.name.rstrip())[0]
            hist_1_current_label = hist_1_curr_child.label

            hist_2_done = True
            hist_2_curr_child = None
            hist_2_parent = history_2.search_nodes(
                name=original_parent.name.rstrip())[
                    0]  # need to check names consistency across the 3 trees
            for child in hist_2_parent.children:
                if len(
                        base_tree.search_nodes(name=child.name)
                ) == 0 and len(child.get_children()) == 1 and len(
                        child.search_nodes(name=original_node.name)
                ) > 0:  #:  # if the child is a root in a tree that holds the original child node, then this child must be on the branch of interest
                    hist_2_curr_child = child
                    hist_2_done = False  # should be false for _baseInternal_52
                    break
            if hist_2_done:
                try:
                    hist_2_curr_child = history_2.search_nodes(
                        name=original_node.name.rstrip())[0]
                except:
                    name = original_node.name.rstrip()
                    original_children = original_node.get_children()
                    exit(1)
            hist_2_current_label = hist_2_curr_child.label

            original_dist = original_node.dist

            while not hist_1_done or not hist_2_done:

                if hist_1_curr_child.name == hist_2_curr_child.name and hist_1_curr_child.name == original_node.name:  # both have reached the original child
                    print(
                        "error! original child wasn't recognized in the end of the loop"
                    )
                    exit(1)

                hist_1_dist = history_1.search_nodes(
                    name=original_node.name.rstrip())[0].dist
                hist_2_dist = history_2.search_nodes(
                    name=original_node.name.rstrip())[0].dist
                if not hist_1_done:  # if there is a node closer to the original node in history 1 -> add it to the united tree first
                    hist_1_dist = hist_1_curr_child.get_distance(
                        original_parent.name) - curr_union_parent.get_distance(
                            original_parent.name)
                if not hist_2_done:
                    hist_2_dist = hist_2_curr_child.get_distance(
                        original_parent.name) - curr_union_parent.get_distance(
                            original_parent.name)

                if debug:
                    if not hist_1_done:
                        print("history 1 has current child of ",
                              original_parent.name, ": ",
                              hist_1_curr_child.name, " with label: ",
                              hist_1_current_label,
                              " and distance from parent is: ", hist_1_dist)
                    if not hist_2_done:
                        print("history 2 has current child of ",
                              original_parent.name, ": ",
                              hist_2_curr_child.name, " with label: ",
                              hist_2_current_label,
                              " and distance from parent is: ", hist_2_dist)

                # first, check if now the two current children have the same name, and if this name is in the base tree - exit
                if hist_1_curr_child.name == hist_2_curr_child.name and len(
                        base_tree.search_nodes(
                            name=hist_1_curr_child.name)) > 0:
                    break

                # else, at least one of the histories has more than one step to go before reaching the bottom of the branch
                if hist_1_dist < hist_2_dist:  # add the node from history 1 and travel down to the next node in history 1
                    if debug:
                        print(
                            "adding child from history 1 which precedes to the one from history 2"
                        )
                        print("the label of the added node in history 1 is: ",
                              hist_1_curr_child.label)
                        print(
                            "the label of the added node in history 2 remains like papa: ",
                            hist_2_current_label)
                    curr_union_parent = curr_union_parent.add_child(
                        child=None,
                        name="internal_" + str(union_nodes_number),
                        dist=hist_1_dist,
                        support=None)
                    curr_union_parent.add_feature("history_1_label",
                                                  hist_1_curr_child.label)
                    curr_union_parent.add_feature("history_2_label",
                                                  hist_2_current_label)
                    hist_1_parent = hist_1_curr_child
                    if len(hist_1_parent.children) == 1:
                        hist_1_curr_child = hist_1_parent.children[0]
                        if hist_1_curr_child.name == original_node.name:
                            hist_1_done = True
                    else:  # two children only occur when reaching a junction from the base tree
                        hist_1_done = True
                    if debug:
                        if hist_1_done:
                            print(
                                "history 1 on the handled branch is complete")
                            continue
                        else:
                            print(
                                "history 1 on the handled branch isn't complete yet"
                            )

                else:  # add the node from history 2 and travel down to the next node in history 2
                    if debug:
                        print(
                            "adding child from history 2 which precedes to the one from history 1"
                        )
                        print("the label of the added node in history 2 is: ",
                              hist_2_curr_child.label)
                        print(
                            "the label of the added node in history 1 remains like papa: ",
                            hist_1_current_label)
                    curr_union_parent = curr_union_parent.add_child(
                        child=None,
                        name="internal_" + str(union_nodes_number),
                        dist=hist_2_dist)  # added as a new branch
                    curr_union_parent.add_feature("history_1_label",
                                                  hist_1_current_label)
                    curr_union_parent.add_feature("history_2_label",
                                                  hist_2_curr_child.label)
                    hist_2_parent = hist_2_curr_child
                    if len(hist_2_parent.children) == 1:
                        hist_2_curr_child = hist_2_parent.children[0]
                        if hist_2_curr_child.name == original_node.name:
                            hist_2_done = True
                    else:
                        hist_2_done = True
                    if debug:
                        if hist_2_done:
                            print(
                                "history 2 on the handled branch is complete")
                            continue
                        else:
                            print(
                                "history 2 on the handled branch isn't complete yet"
                            )
                union_nodes_number += 1

            # now add the original node as the child of the current parent
            residual = original_dist - curr_union_parent.get_distance(
                united_tree.search_nodes(
                    name=original_parent.name.rstrip())[0])
            if residual < 0:
                print("error on residual computation for branch leading to ",
                      original_node.name)
                print("residual: ", residual)
                print("original_dist: ", original_dist)
                print(
                    "curr_union_parent.get_distance(united_tree.search_nodes(name=original_parent.name.rstrip())[0]): ",
                    curr_union_parent.get_distance(
                        united_tree.search_nodes(
                            name=original_parent.name.rstrip())[0]))
                exit(1)
            curr_union_parent = curr_union_parent.add_child(
                child=None, name=original_node.name, dist=residual)
            curr_union_parent.add_feature(
                "history_1_label",
                history_1.search_nodes(
                    name=original_parent.name.rstrip())[0].label)
            curr_union_parent.add_feature(
                "history_2_label",
                history_2.search_nodes(
                    name=original_parent.name.rstrip())[0].label)

    if debug:
        for node in united_tree.traverse("postorder"):
            print("node=", node.name)
            print("label in hist1=", node.history_1_label)
            print("label in hist2=", node.history_2_label)
            print("branch length=", node.dist)
    return united_tree
Example #16
0
class um_tree:
    def __init__(self, tree, PATH):
        self.tree = Tree(tree, format=1)
        self.tree2 = open(tree)
        self.tree.resolve_polytomy(default_dist=0.000001, recursive=True)
        self.tree.dist = 0
        self.tree.add_feature("age", 0)
        self.nodes = self.tree.get_descendants()
        self.PATH = PATH
        internal_node = []
        cnt = 0
        for n in self.nodes:
            node_age = n.get_distance(self.tree)
            n.add_feature("age", node_age)
            if not n.is_leaf():
                n.add_feature("id", cnt)
                cnt = cnt + 1
                internal_node.append(n)
        self.nodes = internal_node
        one_leaf = self.tree.get_farthest_node()[0]
        one_leaf.add_feature("id", cnt + 1)
        if one_leaf.is_leaf():
            self.nodes.append(one_leaf)
        self.nodes.sort(key=self.__compare_node)
        self.species_list = []
        self.coa_roots = None

    def __compare_node(self, node):
        return node.age

    def get_waiting_times(self, threshold_node=None, threshold_node_idx=0):
        wt_list = []
        reach_t = False
        curr_age = 0.0
        curr_spe = 2
        curr_num_coa = 0
        coa_roots = []
        min_brl = 1000
        num_spe = -1

        if threshold_node == None:
            threshold_node = self.nodes[threshold_node_idx]

        last_coa_num = 0
        tcnt = 0
        for node in self.nodes:
            num_children = len(node.get_children())
            wt = None
            times = node.age - curr_age
            if times >= 0:
                if times < min_brl and times > 0:
                    min_brl = times
                curr_age = node.age
                assert curr_spe >= 0

                if reach_t:
                    if tcnt == 0:
                        last_coa_num = 2
                    fnode = node.up
                    coa_root = None

                    idx = 0
                    while not fnode.is_root():
                        idx = 0
                        for coa_r in coa_roots:
                            if coa_r.id == fnode.id:
                                coa_root = coa_r
                                break
                            idx = idx + 1

                        if coa_root != None:
                            break
                        else:
                            fnode = fnode.up

                    wt = waiting_time(length=times,
                                      num_coas=curr_num_coa,
                                      num_lines=curr_spe)

                    for coa_r in coa_roots:
                        coa = coalescent(num_individual=coa_r.curr_n)
                        wt.coas.add_coalescent(coa)

                    wt.coas.coas_idx = last_coa_num
                    wt.num_curr_coa = last_coa_num
                    if (coa_root == None
                        ):  # here can be modified to use multiple T
                        curr_spe = curr_spe - 1
                        curr_num_coa = curr_num_coa + 1
                        node.add_feature("curr_n", 2)
                        coa_roots.append(node)
                        last_coa_num = 2
                    else:
                        curr_n = coa_root.curr_n
                        coa_root.add_feature("curr_n", curr_n + 1)
                        last_coa_num = curr_n + 1
                    tcnt = tcnt + 1
                else:
                    if node.id == threshold_node.id:
                        reach_t = True
                        tcnt = 0
                        wt = waiting_time(length=times,
                                          num_coas=0,
                                          num_lines=curr_spe)
                        num_spe = curr_spe
                        curr_spe = curr_spe - 1
                        curr_num_coa = 2
                        node.add_feature("curr_n", 2)
                        coa_roots.append(node)
                    else:
                        wt = waiting_time(length=times,
                                          num_coas=0,
                                          num_lines=curr_spe)
                        curr_spe = curr_spe + 1
                if times > 0.00000001:
                    wt_list.append(wt)

        for wt in wt_list:
            wt.count_num_lines()

        self.species_list = []
        all_coa_leaves = []
        self.coa_roots = coa_roots
        for coa_r in coa_roots:
            leaves = coa_r.get_leaves()
            all_coa_leaves.extend(leaves)
            self.species_list.append(leaves)

        all_leaves = self.tree.get_leaves()
        for leaf in all_leaves:
            if leaf not in all_coa_leaves:
                self.species_list.append([leaf])

        return wt_list, num_spe

    def show(self, wt_list):
        cnt = 1
        for wt in wt_list:
            print(("Waitting interval " + repr(cnt)))
            print(wt)
            cnt = cnt + 1

    def get_species(self):
        sp_list = []
        for sp in self.species_list:
            spe = []
            for taxa in sp:
                spe.append(taxa.name)
            sp_list.append(spe)

        all_taxa_name = []

        # self.tree.convert_to_ultrametric(tree_length = 1.0, strategy='balanced')

        for leaf in self.tree.get_leaves():
            all_taxa_name.append(leaf.name)

        style0 = NodeStyle()
        style0["fgcolor"] = "#000000"
        # style2["shape"] = "circle"
        style0["vt_line_color"] = "#0000aa"
        style0["hz_line_color"] = "#0000aa"
        style0["vt_line_width"] = 2
        style0["hz_line_width"] = 2
        style0["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
        style0["hz_line_type"] = 0
        style0["size"] = 0

        for node in self.tree.get_descendants():
            node.set_style(style0)
            node.img_style["size"] = 0
        self.tree.set_style(style0)
        self.tree.img_style["size"] = 0

        style1 = NodeStyle()
        style1["fgcolor"] = "#000000"
        # style2["shape"] = "circle"
        style1["vt_line_color"] = "#ff0000"
        style1["hz_line_color"] = "#0000aa"
        style1["vt_line_width"] = 2
        style1["hz_line_width"] = 2
        style1["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
        style1["hz_line_type"] = 0
        style1["size"] = 0

        style2 = NodeStyle()
        style2["fgcolor"] = "#0f0f0f"
        # style2["shape"] = "circle"
        style2["vt_line_color"] = "#ff0000"
        style2["hz_line_color"] = "#ff0000"
        style2["vt_line_width"] = 2
        style2["hz_line_width"] = 2
        style2["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
        style2["hz_line_type"] = 0
        style2["size"] = 0

        for node in self.coa_roots:
            node.set_style(style1)
            node.img_style["size"] = 0
            for des in node.get_descendants():
                des.set_style(style2)
                des.img_style["size"] = 0

        return [all_taxa_name], sp_list

    def print_species(self):
        # tree_path = os.path.dirname(self.tree2.name)
        sp_out = open(os.path.join(self.PATH, "GMYC/GMYC_MOTU.txt"), "w+")
        cnt = 1
        for sp in self.species_list:
            # 			print("Species " + repr(cnt) + ":")
            sp_out.write("Species " + repr(cnt) + "\n")
            cnt = cnt + 1
            taxas = ""
            for taxa in sp:
                taxas = taxas + taxa.name + ", "
            # 			print("	" + taxas[:-1])
            sp_out.write("	" + taxas[:-1] + "\n")

    def output_species(self, taxa_order=[]):
        """taxa_order is a list of taxa names, the paritions will be output as the same order"""
        if len(taxa_order) == 0:
            taxa_order = self.tree.get_leaf_names()

        num_taxa = 0
        for sp in self.species_list:
            for taxa in sp:
                num_taxa = num_taxa + 1
        if not len(taxa_order) == num_taxa:
            print("error error, taxa_order != num_taxa!")
            return None, None
        else:
            partion = [-1] * num_taxa
            cnt = 1
            for sp in self.species_list:
                for taxa in sp:
                    idx = taxa_order.index(taxa.name)
                    partion[idx] = cnt
                cnt = cnt + 1
            return taxa_order, partion

    def num_lineages(self, wt_list):
        nl_list = []
        times = []
        last_time = 0.0
        for wt in wt_list:
            nl_list.append(wt.get_num_branches())
            times.append(last_time)
            last_time = wt.length + last_time

        plt.plot(times, nl_list)
        plt.ylabel("Number of lineages")
        plt.xlabel("Time")
        plt.savefig("Time_Lines")
        plt.show()