Example #1
0
def test():

    custom_functions = {"length":length}

    pattern = """
    (
    len{@.children} > 2
    ,
    len{set{{@.name|}}.intersection{set{{"hello"|"bye"}}}} > 0
    ){length{@.name} < 3 or @.name == "pasa"} and @.dist >= 0.5
    ;
    """

    pattern = TreePattern(pattern, format=8)

    print pattern
    tree = Tree("((hello,(1,2,3)kk)pasa:1, NODE);", format=1)
    print tree.get_ascii(attributes=["name", "dist"])
    print "Pattern matches tree?:", pattern.find_match(tree, custom_functions)

    tree = Tree("((hello,(1,2,3)kk)pasa:0.4, NODE);", format=1)
    print tree.get_ascii(attributes=["name", "dist"])
    print "Pattern matches tree?:", pattern.find_match(tree, custom_functions)

    tree = Tree("(hello,(1,2,3)kk)pasa:1;", format=1)
    print tree.get_ascii(attributes=["name", "dist"])
    print "Pattern matches tree?:", pattern.find_match(tree, custom_functions)

    tree = Tree("((bye,(1,2,3)kk)none:1, NODE);", format=1)
    print tree.get_ascii(attributes=["name", "dist"])
    print "Pattern matches tree?:", pattern.find_match(tree, custom_functions)

    tree = Tree("((bye,(1,2,3)kk)y:1, NODE);", format=1)
    print tree.get_ascii(attributes=["name", "dist"])
    print "Pattern matches tree?:", pattern.find_match(tree, custom_functions)
Example #2
0
 def ete_print(self):
     """ Pretty print.
     
         TODO Debug and document better for case USE_ETE3 == False
     """
     if Cfg.USE_ETE3:
         t = EteTree(self.ete_str(), format=1)
         print(t.get_ascii(show_internal=True))
     else:
         return str(self)
Example #3
0
class Syntax_tree:
    def __init__(self, parse_tree):
        
        # parse_tree = parse_tree.encode("utf-8", errors="replace")
        
        newick_text = self.to_newick_format(parse_tree)
        
        if newick_text == None:
            self.tree = None
        else:
            # newick_text = newick_text.encode("utf-8")
            # self.tree = Tree(newick_text, format=1)

            try:
                # newick_text = newick_text.encode("utf-8")
                self.tree = Tree(newick_text, format=1)
            except:
                self.tree = None
        pass

    def print_tree(self):
        print(self.tree.get_ascii(show_internal=True))

    def get_node_path_to_root(self, node):
        path = ""
        while (not node.is_root()):
            path += node.name + "-->"
            node = node.up
        path += node.name
        return path

    def get_leaf_node_by_token_index(self, token_index):
        leaves = self.tree.get_leaves()
        if token_index >= len(leaves) :
            return None
        return leaves[token_index]

    #根据词的indices ,获取 common_ancestor
    def get_self_category_node_by_token_indices(self, token_indices):
        if len(token_indices) == 1:
            return self.get_leaf_node_by_token_index(token_indices[0]).up

        nodes = []
        for token_index in token_indices:
            node = self.get_leaf_node_by_token_index(token_index)
            nodes.append(node)
        return self.tree.get_common_ancestor(nodes)

    def get_common_ancestor_by_token_indices(self, token_indices):
        nodes = []
        for token_index in token_indices:
            node = self.get_leaf_node_by_token_index(token_index)
            nodes.append(node)
        return self.tree.get_common_ancestor(nodes)


    def get_left_sibling_category_node_by_token_indices(self, token_indices):
        self_category_node = self.get_self_category_node_by_token_indices(token_indices)
        node_id = id(self_category_node)

        if self_category_node.up == None:
            return None

        children = self_category_node.up.get_children()
        for i, child in enumerate(children):
            if node_id == id(child):
                if i == 0:
                    return None
                else:
                    return children[i - 1]

    def get_right_sibling_category_node_by_token_indices(self, token_indices):
        self_category_node = self.get_self_category_node_by_token_indices(token_indices)
        node_id = id(self_category_node)

        if self_category_node.up == None:
            return None

        children = self_category_node.up.get_children()
        for i, child in enumerate(children):
            if node_id == id(child):
                if i == len(children) - 1:
                    return None
                else:
                    return children[i + 1]


    def get_parent_category_node_by_token_indices(self, token_indices):
        self_category_node = self.get_self_category_node_by_token_indices(token_indices)
        return self_category_node.up

    def get_arg1_arg2_None_nodes_list(self,Arg1_token_indices, Arg2_token_indices):
        for node in self.tree.traverse():
            node.add_feature("label","NONE")
        for node in self.tree.get_leaves():
            node.label = "X"
            node.up.label ="X"
        nodes = []
        for token_index in Arg1_token_indices:
            node = self.get_leaf_node_by_token_index(token_index)
            nodes.append(node)
        self.tree.get_common_ancestor(nodes).label = "Arg1_node"
        nodes = []
        for token_index in Arg2_token_indices:
            node = self.get_leaf_node_by_token_index(token_index)
            nodes.append(node)
        self.tree.get_common_ancestor(nodes).label = "Arg2_node"

        arg1_arg2_None_nodes_list = []
        for node in self.tree.traverse():
           if node.label != "X":
               arg1_arg2_None_nodes_list.append(node)
        return arg1_arg2_None_nodes_list


    def to_newick_format(self, parse_tree):
        # 替换 parse_tree 中的 ,

        parse_tree = parse_tree.replace(",", "*COMMA*")
        parse_tree = parse_tree.replace(":", "*COLON*")

        tree_list = self.load_syntax_tree(parse_tree)
        if tree_list == None:
            return None
        tree_list = tree_list[1] #去 root
        s = self.syntax_tree_to_newick(tree_list)
        s = s.replace(",)",")")
        if s[-1] == ",":
            s = s[:-1] + ";"
        return s

    def load_syntax_tree(self, raw_text):
        stack = ["ROOT"]
        text = re.sub(r"\(", " ( ", raw_text)
        text = re.sub(r"\)", " ) ", text)
        text = re.sub(r"\n", " ", text)
        text = re.sub(r"\s+", " ", text)
        text = re.sub(r"^\(\s*\(\s*", "", text)
        text = re.sub(r"\s*\)\s*\)$", "", text)
        for c in text.strip(" ").split(" "):
            if c == ")":
                node = []
                while(1):
                    popped = stack.pop()
                    if popped == "(":
                        break
                    node.append(popped)
                node.reverse()
                if len(node) > 1:
                    stack.append(node)
                else:
                    if node == []:
                        return None
                    stack.append(node[0])
            else:
                stack.append(c)
        return stack

    def syntax_tree_to_newick(self,syntax_tree):
        s = "("
        for child in syntax_tree[1:]:
            if not isinstance(child,list):
                s += child
            else:
                s += self.syntax_tree_to_newick(child)
        s += ")" + str(syntax_tree[0]) + ","
        return s

    #获取的内部节点的位置,不包括pos—tag节点
    def get_internal_node_location(self, node):
        leaves = self.tree.get_leaves()
        if len(node.get_children()) > 1:
            child1 = node.get_children()[0]
            child2 = node.get_children()[1]
            #移至叶子节点
            while not child1.is_leaf():
                child1 = child1.get_children()[0]
            while not child2.is_leaf():
                child2 = child2.get_children()[0]
            index1 = leaves.index(child1)
            index2 = leaves.index(child2)
            return [index1, index2]
        if len(node.get_children()) == 1:
            child1 = node.get_children()[0]
            #移至叶子节点
            while not child1.is_leaf():
                child1 = child1.get_children()[0]
            index1 = leaves.index(child1)
            return [index1]
    def get_node_by_internal_node_location(self, location):
        if len(location) > 1:
            nodes = []
            for token_index in location:
                node = self.get_leaf_node_by_token_index(token_index)
                nodes.append(node)
            return self.tree.get_common_ancestor(nodes)
        if len(location) == 1:
            return self.get_leaf_node_by_token_index(location[0]).up.up

    def get_right_siblings(self, node):
        if node.is_root():
            return []
        children = node.up.get_children()
        for i, child in enumerate(children):
            if child == node:
                if i == len(children) - 1:
                    return []
                return children[i+1:]

    def get_left_siblings(self, node):
        if node.is_root():
            return []
        children = node.up.get_children()
        for i, child in enumerate(children):
            if child == node:
                if i == 0:
                    return []
                return children[:i]

    def get_siblings(self, node):
        if node.is_root():
            return []
        siblings = []
        children = node.up.get_children()
        for i, child in enumerate(children):
            if child != node:
                siblings.append(child)
        return siblings

    def get_relative_position(self, node1, node2):
        if node1 == node2 or node2.is_root():
            return "middle"
        curr = node1
        rsibs = []
        lsibs = []
        while not curr.is_root():
            rsibs.extend(self.get_right_siblings(curr))
            lsibs.extend(self.get_left_siblings(curr))
            curr = curr.up
            if curr == node2:
                return "middle"
        for node in rsibs:
             if node2 in node.get_descendants():
                 return "right"
        for node in lsibs:
             if node2 in node.get_descendants():
                 return "left"

    def get_node_to_node_path(self, node1, node2):
        common_ancestor = self.tree.get_common_ancestor([node1, node2])

        path = ""
        # node1->common_ancestor
        temp = node1
        while temp != common_ancestor:
            path += temp.name +">"
            temp = temp.up
        path += common_ancestor.name
        ## common_ancestor -> node
        p = ""
        temp = node2
        while temp != common_ancestor:
            p = "<" + temp.name + p
            temp = temp.up
        path += p

        return path

    #获取他在syntax_tree的叶子节点的indices,也就是句子中的index
    def get_leaves_indices(self, node):
        leaves = self.tree.get_leaves()
        node_leaves = node.get_leaves()
        indices = sorted([leaves.index(leaf) for leaf in node_leaves])
        return indices
from ete3 import Tree
import sys

t = Tree(sys.argv[1])

i = 0
for node in t.traverse("preorder"):
    if not node.is_leaf():
        node.name = str(i)
        i += 1

print t.get_ascii(show_internal=True)
t.write(format=8, outfile="with_internal_nodes.tree")
Example #5
0
def run(args):
    if args.text_mode:
        from ete3 import Tree
        for tindex, tfile in enumerate(args.src_tree_iterator):
            #print tfile
            if args.raxml:
                nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read())
                t = Tree(nw)
            else:
                t = Tree(tfile)

            print(t.get_ascii(show_internal=args.show_internal_names,
                              attributes=args.show_attributes))
        return

    import random
    import re
    import colorsys
    from collections import defaultdict
    from ete3 import (Tree, PhyloTree, TextFace, RectFace, faces, TreeStyle,
                         add_face_to_node, random_color)

    global FACES

    if args.face:
        FACES = parse_faces(args.face)
    else:
        FACES = []

    # VISUALIZATION
    ts = TreeStyle()
    ts.mode = args.mode
    ts.show_leaf_name = True
    ts.tree_width = args.tree_width


    for f in FACES:
        if f["value"] == "@name":
            ts.show_leaf_name = False
            break

    if args.as_ncbi:
        ts.show_leaf_name = False
        FACES.extend(parse_faces(
            ['value:@sci_name, size:10, fstyle:italic',
             'value:@taxid, color:grey, size:6, format:" - %s"',
             'value:@sci_name, color:steelblue, size:7, pos:b-top, nodetype:internal',
             'value:@rank, color:indianred, size:6, pos:b-bottom, nodetype:internal',
         ]))


    if args.alg:
        FACES.extend(parse_faces(
            ['value:@sequence, size:10, pos:aligned, ftype:%s' %args.alg_type]
         ))

    if args.heatmap:
        FACES.extend(parse_faces(
            ['value:@name, size:10, pos:aligned, ftype:heatmap']
         ))

    if args.bubbles:
        for bubble in args.bubbles:
            FACES.extend(parse_faces(
                ['value:@%s, pos:float, ftype:bubble, opacity:0.4' %bubble,
             ]))

    ts.branch_vertical_margin = args.branch_separation
    if args.show_support:
        ts.show_branch_support = True
    if args.show_branch_length:
        ts.show_branch_length = True
    if args.force_topology:
        ts.force_topology = True
    ts.layout_fn = lambda x: None

    for tindex, tfile in enumerate(args.src_tree_iterator):
        #print tfile
        if args.raxml:
            nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read())
            t = PhyloTree(nw)
        else:
            t = PhyloTree(tfile)


        if args.alg:
            t.link_to_alignment(args.alg, alg_format=args.alg_format)

        if args.heatmap:
            DEFAULT_COLOR_SATURATION = 0.3
            BASE_LIGHTNESS = 0.7
            def gradient_color(value, max_value, saturation=0.5, hue=0.1):
                def rgb2hex(rgb):
                    return '#%02x%02x%02x' % rgb
                def hls2hex(h, l, s):
                    return rgb2hex( tuple([int(x*255) for x in colorsys.hls_to_rgb(h, l, s)]))

                lightness = 1 - (value * BASE_LIGHTNESS) / max_value
                return hls2hex(hue, lightness, DEFAULT_COLOR_SATURATION)


            heatmap_data = {}
            max_value, min_value = None, None
            for line in open(args.heatmap):
                if line.startswith('#COLNAMES'):
                    pass
                elif line.startswith('#') or not line.strip():
                    pass
                else:
                    fields = line.split('\t')
                    name = fields[0].strip()

                    values = [float(x) if x else None for x in fields[1:]]

                    maxv = max(values)
                    minv = min(values)
                    if max_value is None or maxv > max_value:
                        max_value = maxv
                    if min_value is None or minv < min_value:
                        min_value = minv
                    heatmap_data[name] = values

            heatmap_center_value = 0
            heatmap_color_center = "white"
            heatmap_color_up = 0.3
            heatmap_color_down = 0.7
            heatmap_color_missing = "black"

            heatmap_max_value = abs(heatmap_center_value - max_value)
            heatmap_min_value = abs(heatmap_center_value - min_value)

            if heatmap_center_value <= min_value:
                heatmap_max_value = heatmap_min_value + heatmap_max_value
            else:
                heatmap_max_value = max(heatmap_min_value, heatmap_max_value)



        # scale the tree
        if not args.height:
            args.height = None
        if not args.width:
            args.width = None

        f2color = {}
        f2last_seed = {}
        for node in t.traverse():
            node.img_style['size'] = 0
            if len(node.children) == 1:
                node.img_style['size'] = 2
                node.img_style['shape'] = "square"
                node.img_style['fgcolor'] = "steelblue"

            ftype_pos = defaultdict(int)

            for findex, f in enumerate(FACES):
                if (f['nodetype'] == 'any' or
                    (f['nodetype'] == 'leaf' and node.is_leaf()) or
                    (f['nodetype'] == 'internal' and not node.is_leaf())):


                    # if node passes face filters
                    if node_matcher(node, f["filters"]):
                        if f["value"].startswith("@"):
                            fvalue = getattr(node, f["value"][1:], None)
                        else:
                            fvalue = f["value"]

                        # if node's attribute has content, generate face
                        if fvalue is not None:
                            fsize = f["size"]
                            fbgcolor = f["bgcolor"]
                            fcolor = f['color']

                            if fcolor:
                                # Parse color options
                                auto_m = re.search("auto\(([^)]*)\)", fcolor)
                                if auto_m:
                                    target_attr = auto_m.groups()[0].strip()
                                    if not target_attr :
                                        color_keyattr = f["value"]
                                    else:
                                        color_keyattr = target_attr

                                    color_keyattr = color_keyattr.lstrip('@')
                                    color_bin = getattr(node, color_keyattr, None)

                                    last_seed = f2last_seed.setdefault(color_keyattr, random.random())

                                    seed = last_seed + 0.10 + random.uniform(0.1, 0.2)
                                    f2last_seed[color_keyattr] = seed

                                    fcolor = f2color.setdefault(color_bin, random_color(h=seed))

                            if fbgcolor:
                                # Parse color options
                                auto_m = re.search("auto\(([^)]*)\)", fbgcolor)
                                if auto_m:
                                    target_attr = auto_m.groups()[0].strip()
                                    if not target_attr :
                                        color_keyattr = f["value"]
                                    else:
                                        color_keyattr = target_attr

                                    color_keyattr = color_keyattr.lstrip('@')
                                    color_bin = getattr(node, color_keyattr, None)

                                    last_seed = f2last_seed.setdefault(color_keyattr, random.random())

                                    seed = last_seed + 0.10 + random.uniform(0.1, 0.2)
                                    f2last_seed[color_keyattr] = seed

                                    fbgcolor = f2color.setdefault(color_bin, random_color(h=seed))

                            if f["ftype"] == "text":
                                if f.get("format", None):
                                    fvalue = f["format"] % fvalue

                                F = TextFace(fvalue,
                                             fsize = fsize,
                                             fgcolor = fcolor or "black",
                                             fstyle = f.get('fstyle', None))

                            elif f["ftype"] == "fullseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="seq",
                                                       seqtail_format="seq",
                                                       height=fsize)
                            elif f["ftype"] == "compactseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="compactseq",
                                                       seqtail_format="compactseq",
                                                       height=fsize)
                            elif f["ftype"] == "blockseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="blockseq",
                                                   seqtail_format="blockseq",
                                                       height=fsize,
                                                       fgcolor=fcolor or "slategrey",
                                                       bgcolor=fbgcolor or "slategrey",
                                                       scale_factor = 1.0)
                                fbgcolor = None
                            elif f["ftype"] == "bubble":
                                try:
                                    v = float(fvalue)
                                except ValueError:
                                    rad = fsize
                                else:
                                    rad = fsize * v
                                F = faces.CircleFace(radius=rad, style="sphere",
                                                     color=fcolor or "steelblue")

                            elif f["ftype"] == "heatmap":
                                if not f['column']:
                                    col = ftype_pos[f["pos"]]
                                else:
                                    col = f["column"]

                                for i, value in enumerate(heatmap_data.get(node.name, [])):
                                    ftype_pos[f["pos"]] += 1

                                    if value is None:
                                        color = heatmap_color_missing
                                    elif value > heatmap_center_value:
                                        color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_up)
                                    elif value < heatmap_center_value:
                                        color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_down)
                                    else:
                                        color = heatmap_color_center
                                    node.add_face(RectFace(20, 20, color, color), position="aligned", column=col + i)
                                    # Add header
                                    # for i, name in enumerate(header):
                                    #    nameF = TextFace(name, fsize=7)
                                    #    nameF.rotation = -90
                                    #    tree_style.aligned_header.add_face(nameF, column=i)
                                F = None

                            elif f["ftype"] == "profile":
                                # internal profiles?
                                F = None
                            elif f["ftype"] == "barchart":
                                F = None
                            elif f["ftype"] == "piechart":
                                F = None



                            # Add the Face
                            if F:
                                F.opacity = f['opacity'] or 1.0

                                # Set face general attributes
                                if fbgcolor:
                                    F.background.color = fbgcolor

                                if not f['column']:
                                    col = ftype_pos[f["pos"]]
                                    ftype_pos[f["pos"]] += 1
                                else:
                                    col = f["column"]
                                node.add_face(F, column=col, position=f["pos"])

        if args.image:
            t.render("t%d.%s" %(tindex, args.image),
                     tree_style=ts, w=args.width, h=args.height, units=args.size_units)
        else:
            t.show(None, tree_style=ts)
Example #6
0
parser.add_argument(
    '--verbose', action='store_true',
    help=('Print information about the outgroup (if any) taxa to standard '
          'error'))

args = parser.parse_args()

tree = Tree(args.treeFile.read())

if args.outgroupRegex:
    from re import compile
    regex = compile(args.outgroupRegex)
    taxa = [leaf.name for leaf in tree.iter_leaves() if regex.match(leaf.name)]

    if taxa:
        ca = tree.get_common_ancestor(taxa)
        if args.verbose:
            print('Taxa for outgroup:', taxa, file=sys.stderr)
            print('Common ancestor:', ca.name, file=sys.stderr)
            print('Common ancestor is tree:', tree == ca, file=sys.stderr)

        if len(taxa) == 1:
            tree.set_outgroup(tree & taxa[0])
        else:
            if ca == tree:
                tree.set_outgroup(tree.get_midpoint_outgroup())
            else:
                tree.set_outgroup(tree.get_common_ancestor(taxa))

print(tree.get_ascii())
            continue
        if node.support < fNodeSupportCutoff:
            ancestor = node.get_ancestors()[0]
            for child in node.get_children():
                child.dist += node.dist
                ancestor.add_child(child)
            ancestor.remove_child(node)
    return tree


removeUnsupported(tree, fSupportCutoff)

fileOut.write(
    'Input tree with unsupported nodes removed. In ascii format, having problems writing in newick:\n'
)
fileOut.write(tree.get_ascii() + '\n')
fileOut.write('=====\n')
fileOut.flush()


## Progressively assign a compound taxonomy going from the leaves to the root.
def setTaxonomy(node):
    if node.is_leaf():
        return node
    node.taxonomy = {}
    lChildren = node.get_children()
    fChildrenTaxFreqSum = 0  ## typically the number of children, but some taxons may be masked
    for child in lChildren:
        setTaxonomy(child)
        for (sTaxon, fFreq) in child.taxonomy.items():
            fChildrenTaxFreqSum += fFreq
Example #8
0
    return score


def logLikelihood(msafile, treefile):
    #Takes in an msa and a tree topology and uses IQTree to generate a log likelihood
    cmd = "iqtree -s " + msafile + " -z " + treefile
    os.system(cmd)


if __name__ == '__main__':
    #Test Pruning
    if False:
        t = Tree()
        t.populate(10)

        print t.get_ascii()

        keep = [leaf.name for leaf in t]
        np.random.shuffle(keep)
        keep = keep[:7]

        #keep = ['A','B','C','D','G','H','J']

        i = 1
        for node in t.traverse():
            if node.name == '':
                node.name = str(i)
                i += 1

        print keep
        print prune(t, keep)
Example #9
0
import string

host = Tree("(C,(A,B)D)E;", format=8)
guest = Tree(
    "(((a:0.2,r:0.2)p:0.3,b:0.5)e:0.5,((c:0.4,s:0.4)q:0.3,d:0.1)f:0.3)z;",
    format=1)

nodemap = {
    host & "A": [guest & 'a', guest & 's'],
    host & 'B': [guest & 'c', guest & 'r'],
    host & 'C': [guest & 'b', guest & 'd'],
    host & 'D': [guest & 'p', guest & 'q'],
    host & 'E': [guest & 'e', guest & 'f', guest & 'z']
}
print("HOST TREE (W/ NAMES):")
print(host.get_ascii(attributes=['name']))
print
print("GUEST TREE (W/ NAMES):")
print(guest.get_ascii(attributes=['name']))
print
print("GUEST TREE (W/ DISTANCES):")
print(guest.get_ascii(attributes=['dist']))
print

for node in guest.traverse():
    if node.name in ['z']:
        node.add_feature('event', "DUPLICATION")
    elif node.name in ['d']:
        node.add_feature('event', "LOSS")
    else:
        node.add_feature('event', "SPECIATION")
Example #10
0
    for i, haplogroup in enumerate(row):
        if not pd.isnull(haplogroup):
            if not pd.isnull(df.iloc[index, i + 1]):
                annotation = df.iloc[index, i + 1].strip()
                snp = annotation.split('  ')
                #print index, i, haplogroup, snp
                #print t.get_ascii(show_internal=True, attributes=["name", "x", "y"])
                uppers = t.search_nodes(x=i - 1)
                #print uppers
                node = uppers[-1].add_child(name=haplogroup)
                node.add_feature('snp', snp)
                node.add_feature('x', i)
                node.add_feature('y', index)
                break
            else:
                haplogroup = 'unknown'
                annotation = df.iloc[index, i].strip()
                snp = annotation.split('  ')
                #print index, i-1, haplogroup, snp
                uppers = t.search_nodes(x=i - 2)
                #print uppers
                node = uppers[-1].add_child(name=haplogroup + str(index))
                node.add_feature('snp', snp)
                node.add_feature('x', i - 1)
                node.add_feature('y', index)
                break

print t.get_ascii(show_internal=True, attributes=["name", "position"])
#t.show()
t.write(format=1, features=["snp"], outfile="mtDNA_EntireTree.nw")
Example #11
0
dend[176].add_child(dend[180])
dend[180].add_child(dend[181])
dend[180].add_child(dend[182])
dend[175].add_child(dend[183])
dend[183].add_child(dend[184])
dend[184].add_child(dend[185])
dend[184].add_child(dend[186])
dend[186].add_child(dend[187])
dend[186].add_child(dend[188])
dend[183].add_child(dend[189])
dend[189].add_child(dend[190])
dend[189].add_child(dend[191])
dend[135].add_child(dend[192])
dend[192].add_child(dend[193])
dend[193].add_child(dend[194])
dend[194].add_child(dend[195])
dend[194].add_child(dend[196])
dend[193].add_child(dend[197])
dend[192].add_child(dend[198])
print(soma.get_ascii(show_internal=True))


def my_layout(node):
    F = TextFace(node.name, tight_text=True)
    add_face_to_node(F, node, column=0, position="branch-top")


ts.layout_fn = my_layout
if showQt:
    soma.show(tree_style=ts)
class ASRTree:
    #Attributes
    __tree = None
    __charStateChanges = 0
    __numOfTaxa = 0
    __anadromyLookUp = dict(
    )  #Dictionary matching FASTA file names (key) to a list of taxa names and character states
    scientificIndex = 0
    commonIndex = 1
    stateIndex = 2

    #Constructor
    def __init__(self):
        self.__tree = None

#Public Methods
#-----------------------------buildTree-------------------------------------
# Description: Builds newick tree from an aligned and filter FASTA file.
#---------------------------------------------------------------------------

    def buildTree(self, path):
        raxFile = open(path, "r")
        if raxFile.mode == "r":
            contents = raxFile.read()
            self.__tree = Tree(contents)
            print("\nRAxML tree imported successully.")
        else:
            print(
                "\nRAxML tree failed to import successfully. Please check the file path and try again."
            )

    #end buildTree

    #-----------------------runMaxParsimony-------------------------------------
    # Description: Calls private functions for Fitch's algorithm of maximum
    #              parsimony.
    #---------------------------------------------------------------------------
    def runMaxParsimony(
        self
    ):  #Calls private functions for Fitch's algorithm of maximum parsimony
        if self.__tree is None:
            print(
                "\n****************Error****************\nTree has not been imported. Please run buildTree method first."
            )
        else:
            self.__tree.resolve_polytomy(
            )  #Transform tree to bifurcating - does nothing if already bifurcating
            self.__downPass()
            self.__upPass()
            self.__findCharStateChanges()

    #end runMaxParsimony

    #-----------------------------getNumOfTaxa----------------------------------
    # Description: Returns number of taxa.
    #---------------------------------------------------------------------------
    def getNumOfTaxa(self):  #Returns the number of taxa
        return self.__numOfTaxa

    #end getNumOfTaxa

    #--------------------------getCharStateChanges------------------------------
    # Description: Returns number of character state changes.
    #---------------------------------------------------------------------------
    def getCharStateChanges(
            self):  #Returns the number of character state changes
        return self.__charStateChanges

    #end of getCharStateChanges

    #---------------------------importLookUp------------------------------------
    # Description: Imports the look-up file for assigning character state
    #              changes and taxa names.
    #---------------------------------------------------------------------------
    def importLookUp(
        self, path
    ):  #Imports the look-up file for assigning character state changes and taxa names
        importFile = xlrd.open_workbook(path)
        file = importFile.sheet_by_index(0)
        values = list()  #Local list for holding cell row information

        for row in range(
                1, file.nrows):  #Nested loops to cover entire spreadsheet
            for col in range(
                    file.ncols
            ):  #Creates a list of the scientific names, common names and character states for each fish in file
                if col == 0:
                    fileName = file.cell_value(row, col)
                    values.append(fileName)
                elif col == 1:
                    scientificName = file.cell_value(row, col)
                    values.append(scientificName)
                elif col == 2:
                    commonName = file.cell_value(row, col)
                    values.append(commonName)
                else:
                    anadromous = int(file.cell_value(row, col))
                    values.append(anadromous)
                    self.__anadromyLookUp[values[0]] = values[1:]
                    values.clear()

        __numOfTaxa = len(self.__anadromyLookUp)

    #end importLookUp

    #-----------------------------showTree--------------------------------------
    # Description: Displays tree in console and opens an external window to
    #              interact with tree and see branch length.
    #---------------------------------------------------------------------------
    def showTree(self):
        print(
            self.__tree.get_ascii(attributes=["name", "anadromy"],
                                  show_internal=True))
        self.__tree.show()

    #end showTree

    #-----------------------------toString--------------------------------------
    # Description: Prints to console number of taxa and their names, as well as
    #              the number of character state changes.
    #---------------------------------------------------------------------------
    def toString(self):
        if self.__tree == None or self.__charStateChanges == 0:
            return "\n****************Error****************\nTree not constructed, or maximum parsimony not yet run. Please run methods and try again."

        count = 0
        asrInfo = "\n\t\tTaxa\n"
        for key in self.__anadromyLookUp:
            count += 1
            asrInfo += str(count) + ": " + self.__anadromyLookUp[key][
                self.scientificIndex]
            asrInfo += " (" + self.__anadromyLookUp[key][
                self.commonIndex] + ")\n"
        asrInfo += "\nCharacter State Changes: " + str(self.__charStateChanges)
        return asrInfo

    #end toString

#Private Methods
#----------------------------__downPass-------------------------------------
# Description: Private method to perform down-pass to assign character state
#              to tips and internal nodes.
#---------------------------------------------------------------------------

    def __downPass(self):
        for node in self.__tree.traverse("postorder"):
            #Check for internal nodes that have been visted - marked as "Ancestor"
            if node.name is "Ancestor":
                if not node.is_root():
                    if node.up.name is "":
                        node.up.add_feature("anadromy", node.anadromy)
                        node.up.name = "Ancestor"

                    elif node.anadromy.issubset(
                            node.up.anadromy) or node.anadromy.issuperset(
                                node.up.anadromy):
                        node.up.add_feature(
                            "anadromy",
                            node.up.anadromy.intersection(node.anadromy))

                    else:
                        node.up.add_feature(
                            "anadromy", node.up.anadromy.union(node.anadromy))
            else:
                if node.name in self.__anadromyLookUp:
                    isAnadromous = set(
                        [self.__anadromyLookUp[node.name][self.stateIndex]])
                    node.add_feature("anadromy", isAnadromous)

                    if node.up.name is "":  #If the internal node is not yet named, it is unvisited
                        node.up.add_feature("anadromy", isAnadromous)
                        node.up.name = "Ancestor"  #Tag internal nodes as Ancestor to easily identify visited nodes

                    elif self.__anadromyLookUp[node.name][
                            self.stateIndex] in node.up.anadromy:
                        node.up.add_feature(
                            "anadromy",
                            node.anadromy.intersection(node.up.anadromy))

                    else:
                        node.up.add_feature(
                            "anadromy", node.up.anadromy.union(node.anadromy))
                node.name = self.__anadromyLookUp[node.name][self.commonIndex]

    #end __downPass

    #-----------------------------__upPass--------------------------------------
    # Description: Private method to perform up-pass to clear any union in
    #              ancestor nodes by sinding the intersection of the
    #              ancestor and its parent node.
    #---------------------------------------------------------------------------
    def __upPass(self):  #Up-pass to clear any union in ancestor nodes
        for node in self.__tree.traverse("preorder"):
            if node.name is "Ancestor":
                if not node.is_root():
                    if len(node.anadromy) > 1:
                        node.add_feature(
                            "anadromy",
                            node.anadromy.intersection(node.up.anadromy))

    #end __upPass

    #----------------------__findCharStateChanges-------------------------------
    # Description: Private function to find the number of character states
    #              changes in the tree.
    #---------------------------------------------------------------------------
    def __findCharStateChanges(self):
        characterState = 0
        for node in self.__tree.traverse("preorder"):
            if node.is_root():
                characterState = next(iter(node.anadromy))
            else:
                if not (characterState in node.anadromy):
                    self.__charStateChanges += 1
Example #13
0
print "num species in common ancestor: ", len(common_ancestor)

print "score 1: ", len(species_in_tree), "\\", len(
    common_ancestor), "= ", calc_score1(common_ancestor, species_in_tree)
print "******************************************************"

(score, monophyly_nodes) = calc_score2(common_ancestor, species_in_tree)

monos = [node for node in monophyly_nodes if not node.is_leaf()]
max_node = monos[0]
max_include = 0
for node in monos:
    if node.include > max_include:
        max_include = node.include
        max_node = node

print "******************************************************"
print "biggest monophyly group: ", len(max_node)
print "score 2: ", len(max_node), "\\", len(species_in_tree), "= ", score
print "******************************************************"
print max_node

a = [x.name for x in max_node.get_leaves()]
out = tree.get_ascii(show_internal=True)
with open("out", "w") as f:
    f.write(out)
print tree.write(format=1, outfile="new_tree.nw")

a = [x for x in common_ancestor.get_leaves() if x.name in species_in_tree]
print a
Example #14
0
import os, uuid
from ete3 import Tree

for file in os.listdir("/Users/David/Downloads/Chunks"):
    if file.endswith(".tre"):
        outname = "/Users/David/Downloads/Chunks/Chunks_90/" + str(file)
        t = Tree(file, format=0)

        print t.get_ascii(attributes=['support', 'name'])

        for node in t.get_descendants():
            if not node.is_leaf() and node.support <= 0.9:
                node.delete()

        print t.get_ascii(attributes=['support', 'name'])

        t.write(format=0, outfile=outname)
Example #15
0
def main():
    random.seed()
    #Open the files
    trainingFile = open("data/training.txt","r")
    testFile = open("data/test.txt", "r")
    trainingExamples = []
    testExamples = []
    #Read eaxmaples from files
    for line in trainingFile:
        trainingExamples.append(line.split())
    for line in testFile:
        testExamples.append(line.split())
    #Convert 2 to 0 in examples and make the values integers
    for i in xrange(0, len(trainingExamples)):
        for j in xrange(0, len(trainingExamples[0])):
            if trainingExamples[i][j] == '1':
                trainingExamples[i][j] = 1
            else:
                trainingExamples[i][j] = 0
    for i in xrange(0, len(testExamples)):
        for j in xrange(0, len(testExamples[0])):
            if testExamples[i][j] == '1':
                testExamples[i][j] = 1
            else:
                testExamples[i][j] = 0
    #Create the attrbutes 0 to 6
    attributes = [x for x in range(0, len(trainingExamples[0])-1)]
    #Create deep copy in order for training with random Importance
    random_attributes = copy.deepcopy(attributes)
    #Close files
    trainingFile.close()
    testFile.close()
    #Train two trees, one with regular Importance and one with random Importance
    tree = train(trainingExamples, attributes)
    random_tree = train(trainingExamples, random_attributes, True)
    #Test the trees
    accuracy = test(tree, testExamples)
    random_accuracy = test(random_tree, testExamples)
    print accuracy
    print random_accuracy
    #Visualise the trees
    s = print_tree(tree)
    s = s[:-1]
    s += ';'
    print s

    try:
        t = Tree(s, format=1)
        print t.get_ascii(show_internal=True)
    except NameError as e:
        pass

    r = print_tree(random_tree)
    r = r[:-1]
    r += ';'
    print r

    try:
        rt = Tree(r, format=1)
        print rt.get_ascii(show_internal=True)
    except NameError as e:
        pass
Example #16
0
kmeans = KMeans(n_clusters=2,
                random_state=0,
                precompute_distances=True,
                tol=1e-10).fit(dis.values)
kmeans.labels_

id2info = defaultdict(list)
for idx, id in enumerate(dis.index):
    new_name = convert_genome_ID_rev(id.split('_')[0]) + '_' + id
    id2info[new_name] = [str(kmeans.labels_[idx])]
from api_tools.itol_func import *

text = to_binary_shape(id2info, {'1': {}, '0': {}})
with open('../itol_txt/separate_tmp.txt', 'w') as f1:
    f1.write(text)

t = PhyloTree(intree)
# t.set_outgroup(t.get_midpoint_outgroup())
t.set_species_naming_function(
    lambda node: convert_genome_ID_rev(node.name.split('_')[0]))

print(t.get_ascii(attributes=["name", "species"], show_internal=False))
t2 = t.collapse_lineage_specific_expansions()

ntrees, ndups, sptrees = t2.get_speciation_trees()
sptrees = list(sptrees)
print("Found %d species trees and %d duplication nodes" % (ntrees, ndups))
for spt in sptrees:
    print(len(spt.get_leaf_names()))
    This function works with tree files in newick format 
    '''
    t = Tree(treefile)
    branchLenDist = []
    for n in t.traverse():
        if n.dist > 0:
            branchLenDist.append(n.dist)
    df = pd.DataFrame({'branchLen':branchLenDist})
    df['dataset'] = os.path.basename(os.path.dirname(treefile))
    return df


if __name__ == '__main__':
    print("In order to run this script all files must have the same name and extension and they should be saved in directories that have the datasets name. Please see an example below")
    diagram = Tree("((----->treeFileName.treefile)----->dataset1Dir, (----->treeFileName.treefile)----->dataset2Dir, (----->treeFileName.treefile)----->dataset3Dir)rootDir;", format=1)
    print(diagram.get_ascii(show_internal=True))
    rootDir = '/data/Suha/GTR_parameters_dist' #the rootDir name to the directories that contain the tree files
    treeFileName = 'branches.treefile' #the name of the tree file with .treefile extension (any newick format file can be used)
    branchLenFile = 'BranchLen.csv' #the name of the branch lengths output file with .csv extension
    proceed = input("do you want to proceed? Y/N\n")

    if proceed == 'Y':
    
        df = pd.DataFrame()
    
        for DirName, subdirList, fileList in os.walk(rootDir):
            if treeFileName in fileList:
                treeFile = os.path.join(DirName,treeFileName)
                df = df.append(branchLen(treeFile))
    
        df.to_csv(os.path.join(rootDir, branchLenFile))
    '--verbose',
    action='store_true',
    help=('Print information about the outgroup (if any) taxa to standard '
          'error'))

args = parser.parse_args()

tree = Tree(args.treeFile.read())

if args.outgroupRegex:
    from re import compile
    regex = compile(args.outgroupRegex)
    taxa = [leaf.name for leaf in tree.iter_leaves() if regex.match(leaf.name)]

    if taxa:
        ca = tree.get_common_ancestor(taxa)
        if args.verbose:
            print('Taxa for outgroup:', taxa, file=sys.stderr)
            print('Common ancestor:', ca.name, file=sys.stderr)
            print('Common ancestor is tree:', tree == ca, file=sys.stderr)

        if len(taxa) == 1:
            tree.set_outgroup(tree & taxa[0])
        else:
            if ca == tree:
                tree.set_outgroup(tree.get_midpoint_outgroup())
            else:
                tree.set_outgroup(tree.get_common_ancestor(taxa))

print(tree.get_ascii())
Example #19
0
def phylogenetic_tree_to_cluster_format(tree, pairwise_estimates):
    """
    Convert a phylogenetic tree to a 'cluster' data structure as in
    ``fastcluster``. The first two columns indicate the nodes that are joined by
    the relevant node, the third indicates the distance (calculated from branch
    lengths in the case of a phylogenetic tree) and the fourth the number of
    leaves underneath the node. Note that the trees are rooted using
    midpoint-rooting.

    Example of the data structure (output from ``fastcluster``)::

        [[   3.            7.            4.26269776    2.        ]
         [   0.            5.           26.75703595    2.        ]
         [   2.            8.           56.16007598    2.        ]
         [   9.           12.           78.91813609    3.        ]
         [   1.           11.           87.91756528    3.        ]
         [   4.            6.           93.04790855    2.        ]
         [  14.           15.          114.71302639    5.        ]
         [  13.           16.          137.94616373    8.        ]
         [  10.           17.          157.29055403   10.        ]]

    :param tree: newick tree file
    :param pairwise_estimates: pairwise Ks estimates data frame (pandas)
        (only the index is used)
    :return: clustering data structure, pairwise distances dictionary
    """
    id_map = {
        pairwise_estimates.index[i]: i
        for i in range(len(pairwise_estimates))
    }
    t = Tree(tree)

    # midpoint rooting
    midpoint = t.get_midpoint_outgroup()
    if not midpoint:  # midpoint = None when their are only two leaves
        midpoint = list(t.get_leaves())[0]
    t.set_outgroup(midpoint)
    logging.debug('Tree after rooting:\n{}'.format(t.get_ascii()))

    # algorithm for getting cluster data structure
    n = len(id_map)
    out = []
    pairwise_distances = {}
    for node in t.traverse('postorder'):
        if node.is_leaf():
            node.name = id_map[node.name]
            id_map[node.name] = node.name  # add identity map for renamed nodes
            # to id_map for line below
            pairwise_distances[node.name] = {
                id_map[x.name]: node.get_distance(x)
                for x in t.get_leaves()
            }
        else:
            node.name = n
            n += 1
            children = node.get_children()
            out.append([
                children[0].name, children[1].name,
                children[0].get_distance(children[1]),
                len(node.get_leaves())
            ])
    return np.array(out), pairwise_distances
Example #20
0
        node_cur.add_face(nameFace, column=1, position="branch-bottom")

    else:  # for child
        #### search the parent node by parent_id
        node_cur = root.search_nodes(name=str(parent_id))
        # there should be only one parent node
        if len(node_cur) == 1:
            #### set child with its id
            node_cur = node_cur[0].add_child(name=str(cell_id))
            #### set duration
            node_cur.add_feature("dist", time_duration)

            # set node style
            node_cur.set_style(ns)

            # set node name to face
            nameFace = TextFace(node_cur.name)
            nameFace.fgcolor = "white"
            nameFace.fsize = 15
            nameFace.background.color = "green"
            node_cur.add_face(nameFace, column=1, position="branch-bottom")
        else:
            raise RuntimeError("the cell id should be unique!")

#node = root.search_nodes(name=str(5))
#node[0].add_feature("dist", 1.5)

print root.get_ascii()

root.show(tree_style=ts)
        percstr[node.name] = str(sum_parent) + "%"

for node in t2.traverse():
    if not node.is_leaf() and node.level == 1:
        node.add_features(perc=percs.get(node.name, "none"),
                          percstring=percstr.get(node.name, "none"))

sum_title = 0
for child in t2.children:
    sum_title += child.perc
sum_title = round(sum_title, 2)
for node in t2.traverse():
    if node.name == title:
        node.add_features(perc=sum_title, percstring=str(sum_title) + "%")

print t2.get_ascii(attributes=["name", "percstring", "buff"])

ts = TreeStyle()
# provide a list of layout functions, instead of a single one
ts.layout_fn = [size_internal]

ts.show_leaf_name = False

ts.branch_vertical_margin = 10
ts.title.add_face(TextFace("Graphical Representation of " + title, fsize=20),
                  column=0)

ns = NodeStyle()
ns["shape"] = "sphere"
ns["size"] = 1
ns["fgcolor"] = "darkred"
Example #22
0
def visualizeTree(postOrderStr):
    t = Tree(postOrderStr, format=8)
    t_ascii = t.get_ascii(show_internal=True)
    print(t_ascii)
class ASRTree:
    #Attributes
    __tree = None  #Actual tree
    __sim_tree = None  #Simulation tree
    ____transition_prob_anad = None
    __transition_prob_aqp3 = None
    __sim_effect_sizes = []  #List containing simulation effect sizes
    __p_value_count = 0  #Number of times an effect size is simulated => actual
    __effect_size = 0  #Actual effect size of model
    __num_of_branches = __num_anad = __num_aqp3 = __num_anad_and_aqp3 = __num_taxa = __p_value = 0
    __anadromy_lookup = dict(
    )  #Dictionary matching FASTA file names (key) to a list of taxa names and character states
    SCIENTIFIC_INDEX = 0
    COMMON_INDEX = 1
    ANAD_INDEX = 2
    AQP3_INDEX = 3
    EPSILON = 0.00000000000000000001  #Number being added to anadromy/aqp3 variables to avoid division by 0 in effect size

    #Public Methods

    #--------------------------constructor--------------------------------------
    # Description: Constructs ASTree and sets default value for tree, and creates
    #              the 2D list for transition rate matrix, setting initial
    #              values to 0.
    #---------------------------------------------------------------------------
    def __init__(self):
        self.__tree = None
        self.____transition_prob_anad = [[0.0 for x in range(2)]
                                         for y in range(2)]
        self.__transition_prob_aqp3 = [[0.0 for x in range(2)]
                                       for y in range(2)]

    #end constructor

    #-----------------------------build_tree------------------------------------
    # Description: Builds phylogenetic tree from newick tree file in RAxML result.
    #---------------------------------------------------------------------------
    def build_tree(self, path):
        rax_file = open(path, "r")
        if rax_file.mode == "r":
            contents = rax_file.read()
            self.__tree = Tree(contents)
            print("\nRAxML tree imported successully.")
        else:
            print(
                "\nRAxML tree failed to import successfully. Please check the file path and try again."
            )

    #end build_tree

    #-----------------------run_max_parsimony-----------------------------------
    # Description: Calls private functions for Fitch's algorithm of maximum
    #              parsimony.
    #---------------------------------------------------------------------------
    def run_max_parsimony(
        self
    ):  #Calls private functions for Fitch's algorithm of maximum parsimony
        if self.__tree is None:
            print(
                "\n****************Error****************\nTree has not been imported. Please run build_tree method first."
            )
        else:
            self.__tree.resolve_polytomy(
            )  #Transform tree to bifurcating - does nothing if already bifurcating
            self.__down_pass()
            self.__up_pass()
            self.__clean_tree()
            self.__find_char_states()
            self.__find_transition_prob()
            self.__effect_size = self.calc_effect_size(self.__num_anad + self.EPSILON,\
            self.__num_aqp3 + self.EPSILON, self.__num_anad_and_aqp3 + self.EPSILON)

    #end run_max_parsimony

    #-----------------------------get_num_taxa----------------------------------
    # Description: Returns number of taxa.
    #---------------------------------------------------------------------------
    def get_num_taxa(self):
        return self.__num_taxa

    #end get_num_taxa

    #-----------------------------get_p_value-----------------------------------
    # Description: Returns the P-Value of the hypothesis test.
    #---------------------------------------------------------------------------
    def get_p_value(self):
        return self.__p_value

    #end get_p_value

    #--------------------------import_lookup------------------------------------
    # Description: Imports the look-up file for assigning character state
    #              changes and taxa names.
    #---------------------------------------------------------------------------
    def import_lookup(
        self, path
    ):  #Imports the look-up file for assigning character state changes and taxa names
        import_file = xlrd.open_workbook(path)
        file = import_file.sheet_by_index(0)
        values = list()  #Local list for holding cell row information

        for row in range(
                1, file.nrows):  #Nested loops to cover entire spreadsheet
            for col in range(
                    file.ncols
            ):  #Creates a list of the scientific names, common names and character states for each fish in file
                if col == 0:
                    file_name = file.cell_value(row, col)
                    values.append(file_name)
                elif col == 1:
                    scientific_name = file.cell_value(row, col)
                    values.append(scientific_name)
                elif col == 2:
                    common_name = file.cell_value(row, col)
                    values.append(common_name)
                elif col == 3:
                    anadromous = int(file.cell_value(row, col))
                    values.append(anadromous)
                else:
                    aqp3 = int(file.cell_value(row, col))
                    values.append(aqp3)
                    self.__anadromy_lookup[values[0]] = values[1:]
                    values.clear()

        __num_taxa = len(self.__anadromy_lookup)

    #end import_lookup

    #----------------------------show_tree--------------------------------------
    # Description: Displays tree in console and opens an external window to
    #              interact with tree and see branch length.
    #---------------------------------------------------------------------------
    def show_tree(self):
        print(
            self.__tree.get_ascii(attributes=["name", "anadromy", "aqp3"],
                                  show_internal=True))
        self.__tree.show()

    #end show_tree

    #----------------------------to_string--------------------------------------
    # Description: Prints to console number of taxa and their names, as well as
    #              the number of character state changes.
    #---------------------------------------------------------------------------
    def to_string(self):
        if self.__tree == None or self.__effect_size == 0:
            return "\n****************Error****************\nTree not constructed,\
             or maximum parsimony not yet run. Please run methods and try again."

        count = 0
        asr_info = "\n\t\tTaxa\n"
        for key in self.__anadromy_lookup:
            count += 1
            asr_info += str(count) + ": " + self.__anadromy_lookup[key][
                self.SCIENTIFIC_INDEX]
            asr_info += " (" + self.__anadromy_lookup[key][
                self.COMMON_INDEX] + ")\n"
        asr_info += "\nAnadromy Character State Changes: " + str(
            self.__num_anad)
        asr_info += "\nAQP3 Character State Changes: " + str(self.__num_aqp3)
        return asr_info

    #end to_string

    #------------------------calc_effect_size-----------------------------------
    # Description: Public method that calculates the effect size of the ASRTree.
    #---------------------------------------------------------------------------
    def calc_effect_size(self, numOfAnad, numOfAqp3, numAnadAndAqp3):
        effect_size = ((numAnadAndAqp3 / self.__num_of_branches) /
                       ((numOfAnad / self.__num_of_branches) *
                        (numOfAqp3 / self.__num_of_branches)))
        return effect_size

    #end calc_effect_size

    #-------------------------monte_carlo_sim-----------------------------------
    # Description: Public method to run n number of Monte Carlo simulations
    #              in order to test the hypothesis. Each simulation checks
    #              the ancestral node in the tree, then refers to the transition
    #              rate matrix for the probability of getting the same or a
    #              different character state.
    #---------------------------------------------------------------------------
    def monte_carlo_sim(self, num_sims):
        #Checks if there already is a simulation tree to avoid unncessary copies
        self.__p_value_count = 0  #Initialize back to 0
        self.__sim_effect_sizes.clear()  #Initialize back to empty
        if self.__sim_tree is None:
            self.__sim_tree = self.__tree.copy()
        for sim in range(num_sims):
            #Set values of each count back to the EPSILON value to avoid
            #division by 0 in the effect size
            aqp3_count = self.EPSILON
            anad_count = self.EPSILON
            anad_aqp3_count = self.EPSILON
            for node in self.__sim_tree.traverse("preorder"):
                rand_num_1 = random.randint(0, 1001)
                rand_num_2 = random.randint(0, 1001)
                if not node.is_root():
                    #Check each ancestor's character state, and roll a random
                    #number against the probability of going from that state to
                    #the same or a different state based on transition matrix
                    #and assign that character state. Tally all gains
                    if node.up.anadromy == 1:
                        if (self.____transition_prob_anad[1][0] *
                                1000) > rand_num_1:
                            node.add_feature("anadromy", 0)
                        else:
                            node.add_feature("anadromy", 1)
                            anad_count += 1
                    else:
                        if (self.____transition_prob_anad[0][1] *
                                1000) < rand_num_1:
                            node.add_feature("anadromy", 0)
                        else:
                            node.add_feature("anadromy", 1)
                            anad_count += 1
                    if node.up.aqp3 == 1:
                        if (self.__transition_prob_aqp3[1][0] *
                                1000) > rand_num_2:
                            node.add_feature("aqp3", 0)
                        else:
                            node.add_feature("aqp3", 1)
                            aqp3_count += 1
                    else:
                        if (self.__transition_prob_aqp3[0][1] *
                                1000) < rand_num_2:
                            node.add_feature("aqp3", 0)
                        else:
                            node.add_feature("aqp3", 1)
                            aqp3_count += 1
                    if node.anadromy == 1 and node.aqp3 == 1:
                        anad_aqp3_count += 1
            #Calculate the effect size and store the results.
            eff_size = self.calc_effect_size(anad_count, aqp3_count,
                                             anad_aqp3_count)
            self.__sim_effect_sizes.append(eff_size)
            if eff_size >= self.__effect_size:
                self.__p_value_count += 1
        self.__p_value = (self.__p_value_count / num_sims
                          )  #Calculate and store p-value

    #end monte_carlo_sim

    #--------------------------plot_histogram-----------------------------------
    # Description: Public method to plot the histogram for testing the null
    #              hypothesis.
    #---------------------------------------------------------------------------
    def plot_histogram(self):
        plt.style.use('seaborn')
        _ = plt.hist(self.__sim_effect_sizes, bins=100)
        plt.axvline(self.__effect_size,
                    color='k',
                    linestyle='dashed',
                    linewidth=1)
        plt.text(self.__effect_size + .05, 200,
                 '   Actual Effect Size:{:.3f}'.format(self.__effect_size))
        plt.xlabel('Effect Size')
        plt.ylabel('Effect Frequency')
        plt.title('Monte Carlo Simulation Distribution')
        plt.show()

    #end plot_histogram

    #--------------------__find_transition_prob---------------------------------
    # Description: Private method that determines the transition probability
    #              of each character trait change.
    #---------------------------------------------------------------------------
    def __find_transition_prob(self):
        #Establish counter variables and traverse tree
        zero_to_one_anad = zero_to_zero_anad = one_to_zero_anad = one_to_one_anad = 0.0
        zero_to_one_aqp3 = zero_to_zero_aqp3 = one_to_zero_aqp3 = one_to_one_aqp3 = 0.0
        for node in self.__tree.traverse("postorder"):
            if not node.is_root():
                #Find Anadromy transitions
                if (node.up.anadromy is 0 and node.anadromy is 0):
                    zero_to_zero_anad += 1
                elif (node.up.anadromy is 0 and node.anadromy is 1):
                    zero_to_one_anad += 1
                elif (node.up.anadromy is 1 and node.anadromy is 0):
                    one_to_zero_anad += 1
                else:
                    one_to_one_anad += 1
                #Find AQP3 transitions
                if (node.up.aqp3 is 0 and node.aqp3 is 0):
                    zero_to_zero_aqp3 += 1
                elif (node.up.aqp3 is 0 and node.aqp3 is 1):
                    zero_to_one_aqp3 += 1
                elif (node.up.aqp3 is 1 and node.aqp3 is 0):
                    one_to_zero_aqp3 += 1
                else:
                    one_to_one_aqp3 += 1

        #Insert the probability into the appropriate matrix
        self.____transition_prob_anad[0][0] = (zero_to_zero_anad /
                                               self.__num_of_branches)
        self.____transition_prob_anad[0][1] = (zero_to_one_anad /
                                               self.__num_of_branches)
        self.____transition_prob_anad[1][1] = (one_to_one_anad /
                                               self.__num_of_branches)
        self.____transition_prob_anad[1][0] = (one_to_zero_anad /
                                               self.__num_of_branches)

        self.__transition_prob_aqp3[0][0] = (zero_to_zero_aqp3 /
                                             self.__num_of_branches)
        self.__transition_prob_aqp3[0][1] = (zero_to_one_aqp3 /
                                             self.__num_of_branches)
        self.__transition_prob_aqp3[1][1] = (one_to_one_aqp3 /
                                             self.__num_of_branches)
        self.__transition_prob_aqp3[1][0] = (one_to_zero_aqp3 /
                                             self.__num_of_branches)

    #end findTransitionProb

#Private Methods
#---------------------------__down_pass-------------------------------------
# Description: Private method to perform down-pass to assign character state
#              to tips and internal nodes.
#---------------------------------------------------------------------------

    def __down_pass(self):
        for node in self.__tree.traverse("postorder"):
            #Check for internal nodes that have been visted - marked as "Ancestor"
            if node.name is "Ancestor":
                if not node.is_root():
                    #If the parent node of the current ancestor node is unvisited,
                    #attach the character state of this node to its ancestor
                    if node.up.name is "":
                        node.up.add_feature("anadromy", node.anadromy)
                        node.up.add_feature("aqp3", node.aqp3)
                        node.up.name = "Ancestor"
                    #If the node has an intersection with its ancestor, set it
                    if node.aqp3.issubset(
                            node.up.aqp3) or node.aqp3.issuperset(
                                node.up.aqp3):
                        node.up.add_feature(
                            "aqp3", node.up.aqp3.intersection(node.aqp3))
                    else:  #Otherwise, it's a union of two states
                        node.up.add_feature("aqp3",
                                            node.up.aqp3.union(node.aqp3))
                    #If the node has an intersection with its ancestor, set it
                    if node.anadromy.issubset(
                            node.up.anadromy) or node.anadromy.issuperset(
                                node.up.anadromy):
                        node.up.add_feature(
                            "anadromy",
                            node.up.anadromy.intersection(node.anadromy))
                    else:  #Otherwise, it's a union of two states
                        node.up.add_feature(
                            "anadromy", node.up.anadromy.union(node.anadromy))
            else:  #Otherwise, it could be an unnamed internal node, or a terminal node
                #If it's a terminal node, grab its states from the lookup
                if node.name in self.__anadromy_lookup:
                    isAnadromous = set(
                        [self.__anadromy_lookup[node.name][self.ANAD_INDEX]])
                    isAqp3 = set(
                        [self.__anadromy_lookup[node.name][self.AQP3_INDEX]])
                    node.add_feature("anadromy", isAnadromous)
                    node.add_feature("aqp3", isAqp3)

                    if node.up.name is "":  #If the internal node is not yet named, it is unvisited
                        node.up.add_feature("anadromy", isAnadromous)
                        node.up.add_feature("aqp3", isAqp3)
                        node.up.name = "Ancestor"  #Tag internal nodes as Ancestor to easily identify visited nodes

                    if self.__anadromy_lookup[node.name][
                            self.AQP3_INDEX] in node.up.aqp3:
                        node.up.add_feature(
                            "aqp3", node.aqp3.intersection(node.up.aqp3))
                    else:
                        node.up.add_feature("aqp3",
                                            node.up.aqp3.union(node.aqp3))

                    if self.__anadromy_lookup[node.name][
                            self.ANAD_INDEX] in node.up.anadromy:
                        node.up.add_feature(
                            "anadromy",
                            node.anadromy.intersection(node.up.anadromy))
                    else:
                        node.up.add_feature(
                            "anadromy", node.up.anadromy.union(node.anadromy))
                node.name = self.__anadromy_lookup[node.name][
                    self.COMMON_INDEX]

    #end __down_pass

    #----------------------------__up_pass--------------------------------------
    # Description: Private method to perform up-pass to clear any union in
    #              ancestor nodes by sinding the intersection of the
    #              ancestor and its parent node.
    #---------------------------------------------------------------------------
    def __up_pass(self):  #Up-pass to clear any union in ancestor nodes
        for node in self.__tree.traverse("preorder"):
            if node.name is "Ancestor":
                if not node.is_root():
                    if len(node.anadromy) > 1:
                        node.add_feature(
                            "anadromy",
                            node.anadromy.intersection(node.up.anadromy))
                    if len(node.aqp3) > 1:
                        node.add_feature("aqp3",
                                         node.aqp3.intersection(node.up.aqp3))

    #end __up_pass

    #--------------------------__clean_tree-------------------------------------
    # Description: Private function to clear the sets in the attributes for
    #              anadromy and AQP3 in each node and turn them into integers.
    #---------------------------------------------------------------------------
    def __clean_tree(self):
        for node in self.__tree.traverse("preorder"):
            character_state_anad = next(iter(node.anadromy))
            character_state_aqp3 = next(iter(node.aqp3))
            node.add_feature("anadromy", character_state_anad)
            node.add_feature("aqp3", character_state_aqp3)

    #end __clean_tree

    #-------------------------__find_char_states---------------------------------
    # Description: Private function to find the number of branches, as well as
    #              find the number of character states - both individual and
    #              branches with both andromy and AQP3.
    #---------------------------------------------------------------------------
    def __find_char_states(self):
        for node in self.__tree.traverse("preorder"):
            self.__num_of_branches += 1
            if node.anadromy == 1 and node.aqp3 == 1:
                self.__num_anad_and_aqp3 += 1
            if node.anadromy == 1:
                self.__num_anad += 1
            if node.aqp3 == 1:
                self.__num_aqp3 += 1
        self.__num_of_branches -= 1  #Not counting the root as a separate branch
Example #24
0
import numpy as np
from setup_gloome_param_files import isIndDict, genotype_dict, geneDict, good_indices
import pandas as pd
from uniqify import uniqify
from unlistify import unlistify
from copy import copy
import pickle
import re

# Getting the tree with internal nodes from gainLoss' ancestral reconstruction.
# Internal nodes are labeled with 'Nx' where x is a number, the root being
# '[N1]' and then the numbers increase.
ancTree_njs16 = Tree('full_proks_gainLoss_results/TheTree.INodes.ph', format=1)

# To print the tree.
print(ancTree_njs16.get_ascii(show_internal=True))

# Getting the root of the tree.
root = ancTree_njs16 & '[N1]'
nodes = list(root.traverse())

# Getting the original traits in a trait dict.
for orgName in isIndDict:
    (ancTree_njs16 & orgName).add_feature('isInd', isIndDict[orgName])


# Writing the markNode recursive function.
def markNode(tree, node):
    children = node.children

    # Checking if all children marked.
Example #25
0
from ete3 import Tree
x = Tree("(((D:3,(E:3)F:2)C:2)B:4)A;", format=1)
print(x.get_ascii(show_internal = True))
Example #26
0
def Tree_analysis(tree,tabla,out,analysis_type,out2):  

	###Al subsequents variables could be modified
	binomial_value = float(0.05) #Default value for the option 2 of the core evaluation method for the tree
	p_value = float(0.05) #p-value threeshold for the binomial method (2 method) 
	percentage = float(0.9) #Minimun percentage threeshold of subjects requiered to defined a core 
	taxo_p = float(0.9) #Minimun percentage of the same taxonomic group within all OTUs contained into the same Node
	
	output_file=open(out, 'w')
	output_file_2=open(out2, 'w')	

	tree = Tree(tree, quoted_node_names=True, format=1) #Here we load the 97_otus tree
	table = {}
	cont = 1
	for line in open(tabla):
		if (line.startswith('#')):
			output_file_2.write(str(line))
		else:
			fields = list(map(str.strip, line.split('\t'))) #We create a dictionary with all the keys and values of the OTU table against reference
			table[fields[0]] = list(map(float, fields[1:-1]))
	
	table2 = {}
	
	for line in open(tabla):
		if (line.startswith('#')):
			continue
		else:
			fields2 = list(map(str.strip, line.split('\t'))) #Here we load a dictionary with the taxonomy information from the picked OTUs
			table2[fields2[0]] = list(map(str, fields2[(len(fields2)-1):len(fields2)]))
	
	table_final_res = [0] * len(fields[1:-1])
	table_final_res = ([float(i) for i in table_final_res])
	sum_abun_rela = 0
	cores = 0
	
	for leaf in tree:
		if leaf.name not in table:
			leaf.vector = None
		else:
			leaf.vector = table[leaf.name] #Create value vectors for each of the tree tips of the tree with the values of the OTU table previously generated

	node2content = tree.get_cached_content()

	flag=0
	for node in tree.traverse(): #This loop is used to add values into de vectors created before
		if not node.is_leaf():

			leaf_vectors = np.array([leaf.vector for leaf in node2content[node] if leaf.vector is not None])
			node.vector = leaf_vectors.sum(axis=0)
		
			if(flag == 0):
				save_node1=node.vector
				total_saved_leaves = np.array([leaf.name for leaf in node2content[node]])

				flag=1
	
	if(analysis_type==4): #This method only prints the information of the tree, only for information of the tree purpouse
		print(tree.get_ascii(show_internal=True))
		output_file.write(tree.get_ascii(show_internal=True) + '\n' + '\n')
		for node in tree.traverse("preorder"):
			print (node.name, node.vector)
			output_file.write(node.name + '\t' + str(node.vector) + '\n')

	if(analysis_type!=4):
		output_file.write("Core" + '\t' + "Prevalence" + '\t' + "Abundance" + '\t' + "Relative abundances" + '\t' + "Min" + '\t' + "Max" + '\t' + "Average" + '\t' + "SD" + '\t' + "Leaves" + '\t' + "Taxonomy" + '\t' + "Leaves number" + '\n') 
	
	
	if(analysis_type==1 or analysis_type==2 or analysis_type==3): #Here we evaluate the tree traversally using one of the choosen methods: 100% core, binomial or percentage
		for node in tree.traverse("postorder"):
		
			tot_cont=np.count_nonzero(node.vector) #Count the number ob subjects in this study with one ore more ocurrence in the vector for a certain node 
			tot_cont2=np.asarray(node.vector).size #Count the total vector array size
			a=stats.binom_test(tot_cont, n=tot_cont2, p=binomial_value, alternative='greater') #Binomial test that uses the binomial_value
			rela=(tot_cont/tot_cont2)
			
			if(analysis_type==1 and np.all(node.vector) or (analysis_type==2 and a <= p_value) or (analysis_type==3 and rela >= percentage)): #Depending on the method used to go through the tree, we will evaluate different parameters to check if the node should be or not taken into account
				
				node.vector=([float(i) for i in node.vector]) #Transform all the values contained in node.vector to float, to perform operations efficiently 
				abundance=node.vector/save_node1 #Relative abundance of each subject in the node over the terminal node (sum of all nodes)
				abundance =([float(i) for i in abundance]) 
				mean_abun=np.mean([float(i) for i in abundance]) #Mean abundance of the node
				std_abun=np.std([float(i) for i in abundance]) #Standard deviation of the node
				abundance_rela=sum(node.vector)/sum(save_node1) #Global relative abundance of the node over the terminal node
				table_final_res=list(map(sum, zip(table_final_res, abundance))) #Getting all the results for each node into a final result table
				sum_abun_rela=sum_abun_rela+abundance_rela #The sum of all global relative abundance
				cores=cores+1 #Total number of cores
				
				node2content = tree.get_cached_content()
				
				output_file_2.write(str(node.name) + '\t')
				for x in range(len(abundance)): 
					output_file_2.write(str(abundance[x]) + '\t'),
				output_file_2.write('\n')
				
				output_file.write(node.name + '\t' +  str(rela) + '\t' + str(node.vector) + '\t' + str(abundance) + '\t' + str(min(abundance)) + '\t' + str(max(abundance)) + '\t' + str(mean_abun) + '\t' + str(std_abun) + '\t')
								
				conteo_hojas=nodes_eval(node,tree,output_file,table2,taxo_p,total_saved_leaves) #With this line we can assign a taxonomy to each node based in the taxonomy of each OTU, dependig on the minimun taxonomy percentage level stablished before 
				
				output_file.write(str(conteo_hojas) + '\n') #Print the total number of leaves of this node
				
				tree=erase_node(node,tree) #Once a node has been evaluated, this line erase that node from the tree to simplify the calculations of the next nodes
			
				G = tree.search_nodes(name=node.name)[0]
				removed_node = G.detach()
						
		output_file.write(str(cores) + '\t' + '\t' + '\t' + str(table_final_res) + '\t' + str(min(table_final_res)) + '\t' + str(max(table_final_res)) + '\t' + str(np.mean([float(i) for i in table_final_res])) + '\t' + str(np.std([float(i) for i in table_final_res])) + '\n')
Example #27
0
    a.add_features(active=True)
    dictTree[nameNode] = a
    print(a)

#Exemple de merge
nameNode = 'd'
dictPos[nameNode] = 1
#nameList = ['a','d']
noeud = Tree()
#a = t.add_child(name=nameNode)
noeud.add_child(dictTree[nameList[1]])
noeud.add_child(dictTree[nameList[2]])
noeud.add_features(name=nameNode)
dictTree[nameNode] = noeud
test = dictTree[nameNode]
print(test.get_ascii(show_internal=True))
print(noeud.get_ascii(show_internal=True))

print(dictPos)
print(dictTree)
for node in t1:
    if node.is_root():
        print("hello")
    #if not node.is_leaf():
    #innerbranch.append(node)
    #print (node)

#for leaf in t1:
#print (leaf.name)

#print(t1.get_tree_root())
Example #28
0
                "time":time.strftime("%H:%M:%S")}

    data[service]["user_input"] = userinput
    data[service]["run_info"] = run_info
    data[service]["results"] = summary_cont

    result_file = os.path.join(outdir, "data.json")
    with open(result_file, "w") as outfile:
        json.dump(data, outfile)

    # Create tree if neighbor parameter was sat
    if args.nj_path:
        # Check that more than 2 + header samples are included in the analysis
        if len(allel_output) > 3:
            python_path = sys.executable
            tree_script = os.path.join(os.path.dirname(__file__), "make_nj_tree.py")
            cmd = "{} {} -i {} -o {} -n {}".format(python_path, tree_script, allele_matrix, outdir, args.nj_path)
            proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            out, err = proc.communicate()
            out = out.decode("utf-8")
            err = err.decode("utf-8")

            if proc.returncode != 0:
                eprint("No neighbor joining tree was created. The neighbor program responded with this: {}".format(err))
            else:
                # print newick

                tr = Tree("{}/allele_tree.newick".format(outdir))

                print(tr.get_ascii())
Example #29
0
    
    else:  # for child
        #### search the parent node by parent_id
        node_cur = root.search_nodes(name=str(parent_id))
        # there should be only one parent node
        if len(node_cur) == 1:
            #### set child with its id
            node_cur = node_cur[0].add_child(name=str(cell_id))  
            #### set duration
            node_cur.add_feature("dist", time_duration)
            
            # set node style
            node_cur.set_style(ns)
            
            # set node name to face
            nameFace = TextFace(node_cur.name)
            nameFace.fgcolor = "white"
            nameFace.fsize = 15
            nameFace.background.color = "green"
            node_cur.add_face(nameFace, column=1, position="branch-bottom")
        else:
            raise RuntimeError("the cell id should be unique!")
            
        
#node = root.search_nodes(name=str(5))
#node[0].add_feature("dist", 1.5)
    
print root.get_ascii()

root.show(tree_style=ts)
Example #30
0
def run(args):
    if args.text_mode:
        from ete3 import Tree
        for tindex, tfile in enumerate(args.src_tree_iterator):
            #print tfile
            if args.raxml:
                nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]",
                            open(tfile).read())
                t = Tree(nw)
            else:
                t = Tree(tfile)

            print(
                t.get_ascii(show_internal=args.show_internal_names,
                            attributes=args.show_attributes))
        return

    import random
    import re
    import colorsys
    from collections import defaultdict
    from ete3 import (Tree, PhyloTree, TextFace, RectFace, faces, TreeStyle,
                      add_face_to_node, random_color)

    global FACES

    if args.face:
        FACES = parse_faces(args.face)
    else:
        FACES = []

    # VISUALIZATION
    ts = TreeStyle()
    ts.mode = args.mode
    ts.show_leaf_name = True
    ts.tree_width = args.tree_width

    for f in FACES:
        if f["value"] == "@name":
            ts.show_leaf_name = False
            break

    if args.as_ncbi:
        ts.show_leaf_name = False
        FACES.extend(
            parse_faces([
                'value:@sci_name, size:10, fstyle:italic',
                'value:@taxid, color:grey, size:6, format:" - %s"',
                'value:@sci_name, color:steelblue, size:7, pos:b-top, nodetype:internal',
                'value:@rank, color:indianred, size:6, pos:b-bottom, nodetype:internal',
            ]))

    if args.alg:
        FACES.extend(
            parse_faces([
                'value:@sequence, size:10, pos:aligned, ftype:%s' %
                args.alg_type
            ]))

    if args.heatmap:
        FACES.extend(
            parse_faces(['value:@name, size:10, pos:aligned, ftype:heatmap']))

    if args.bubbles:
        for bubble in args.bubbles:
            FACES.extend(
                parse_faces([
                    'value:@%s, pos:float, ftype:bubble, opacity:0.4' % bubble,
                ]))

    ts.branch_vertical_margin = args.branch_separation
    if args.show_support:
        ts.show_branch_support = True
    if args.show_branch_length:
        ts.show_branch_length = True
    if args.force_topology:
        ts.force_topology = True
    ts.layout_fn = lambda x: None

    for tindex, tfile in enumerate(args.src_tree_iterator):
        #print tfile
        if args.raxml:
            nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]",
                        open(tfile).read())
            t = PhyloTree(nw)
        else:
            t = PhyloTree(tfile)

        if args.alg:
            t.link_to_alignment(args.alg, alg_format=args.alg_format)

        if args.heatmap:
            DEFAULT_COLOR_SATURATION = 0.3
            BASE_LIGHTNESS = 0.7

            def gradient_color(value, max_value, saturation=0.5, hue=0.1):
                def rgb2hex(rgb):
                    return '#%02x%02x%02x' % rgb

                def hls2hex(h, l, s):
                    return rgb2hex(
                        tuple([
                            int(x * 255) for x in colorsys.hls_to_rgb(h, l, s)
                        ]))

                lightness = 1 - (value * BASE_LIGHTNESS) / max_value
                return hls2hex(hue, lightness, DEFAULT_COLOR_SATURATION)

            heatmap_data = {}
            max_value, min_value = None, None
            for line in open(args.heatmap):
                if line.startswith('#COLNAMES'):
                    pass
                elif line.startswith('#') or not line.strip():
                    pass
                else:
                    fields = line.split('\t')
                    name = fields[0].strip()

                    values = [float(x) if x else None for x in fields[1:]]

                    maxv = max(values)
                    minv = min(values)
                    if max_value is None or maxv > max_value:
                        max_value = maxv
                    if min_value is None or minv < min_value:
                        min_value = minv
                    heatmap_data[name] = values

            heatmap_center_value = 0
            heatmap_color_center = "white"
            heatmap_color_up = 0.3
            heatmap_color_down = 0.7
            heatmap_color_missing = "black"

            heatmap_max_value = abs(heatmap_center_value - max_value)
            heatmap_min_value = abs(heatmap_center_value - min_value)

            if heatmap_center_value <= min_value:
                heatmap_max_value = heatmap_min_value + heatmap_max_value
            else:
                heatmap_max_value = max(heatmap_min_value, heatmap_max_value)

        # scale the tree
        if not args.height:
            args.height = None
        if not args.width:
            args.width = None

        f2color = {}
        f2last_seed = {}
        for node in t.traverse():
            node.img_style['size'] = 0
            if len(node.children) == 1:
                node.img_style['size'] = 2
                node.img_style['shape'] = "square"
                node.img_style['fgcolor'] = "steelblue"

            ftype_pos = defaultdict(int)

            for findex, f in enumerate(FACES):
                if (f['nodetype'] == 'any'
                        or (f['nodetype'] == 'leaf' and node.is_leaf()) or
                    (f['nodetype'] == 'internal' and not node.is_leaf())):

                    # if node passes face filters
                    if node_matcher(node, f["filters"]):
                        if f["value"].startswith("@"):
                            fvalue = getattr(node, f["value"][1:], None)
                        else:
                            fvalue = f["value"]

                        # if node's attribute has content, generate face
                        if fvalue is not None:
                            fsize = f["size"]
                            fbgcolor = f["bgcolor"]
                            fcolor = f['color']

                            if fcolor:
                                # Parse color options
                                auto_m = re.search("auto\(([^)]*)\)", fcolor)
                                if auto_m:
                                    target_attr = auto_m.groups()[0].strip()
                                    if not target_attr:
                                        color_keyattr = f["value"]
                                    else:
                                        color_keyattr = target_attr

                                    color_keyattr = color_keyattr.lstrip('@')
                                    color_bin = getattr(
                                        node, color_keyattr, None)

                                    last_seed = f2last_seed.setdefault(
                                        color_keyattr, random.random())

                                    seed = last_seed + 0.10 + random.uniform(
                                        0.1, 0.2)
                                    f2last_seed[color_keyattr] = seed

                                    fcolor = f2color.setdefault(
                                        color_bin, random_color(h=seed))

                            if fbgcolor:
                                # Parse color options
                                auto_m = re.search("auto\(([^)]*)\)", fbgcolor)
                                if auto_m:
                                    target_attr = auto_m.groups()[0].strip()
                                    if not target_attr:
                                        color_keyattr = f["value"]
                                    else:
                                        color_keyattr = target_attr

                                    color_keyattr = color_keyattr.lstrip('@')
                                    color_bin = getattr(
                                        node, color_keyattr, None)

                                    last_seed = f2last_seed.setdefault(
                                        color_keyattr, random.random())

                                    seed = last_seed + 0.10 + random.uniform(
                                        0.1, 0.2)
                                    f2last_seed[color_keyattr] = seed

                                    fbgcolor = f2color.setdefault(
                                        color_bin, random_color(h=seed))

                            if f["ftype"] == "text":
                                if f.get("format", None):
                                    fvalue = f["format"] % fvalue

                                F = TextFace(fvalue,
                                             fsize=fsize,
                                             fgcolor=fcolor or "black",
                                             fstyle=f.get('fstyle', None))

                            elif f["ftype"] == "fullseq":
                                F = faces.SeqMotifFace(seq=fvalue,
                                                       seq_format="seq",
                                                       seqtail_format="seq",
                                                       height=fsize)
                            elif f["ftype"] == "compactseq":
                                F = faces.SeqMotifFace(
                                    seq=fvalue,
                                    seq_format="compactseq",
                                    seqtail_format="compactseq",
                                    height=fsize)
                            elif f["ftype"] == "blockseq":
                                F = faces.SeqMotifFace(
                                    seq=fvalue,
                                    seq_format="blockseq",
                                    seqtail_format="blockseq",
                                    height=fsize,
                                    fgcolor=fcolor or "slategrey",
                                    bgcolor=fbgcolor or "slategrey",
                                    scale_factor=1.0)
                                fbgcolor = None
                            elif f["ftype"] == "bubble":
                                try:
                                    v = float(fvalue)
                                except ValueError:
                                    rad = fsize
                                else:
                                    rad = fsize * v
                                F = faces.CircleFace(radius=rad,
                                                     style="sphere",
                                                     color=fcolor
                                                     or "steelblue")

                            elif f["ftype"] == "heatmap":
                                if not f['column']:
                                    col = ftype_pos[f["pos"]]
                                else:
                                    col = f["column"]

                                for i, value in enumerate(
                                        heatmap_data.get(node.name, [])):
                                    ftype_pos[f["pos"]] += 1

                                    if value is None:
                                        color = heatmap_color_missing
                                    elif value > heatmap_center_value:
                                        color = gradient_color(
                                            abs(heatmap_center_value - value),
                                            heatmap_max_value,
                                            hue=heatmap_color_up)
                                    elif value < heatmap_center_value:
                                        color = gradient_color(
                                            abs(heatmap_center_value - value),
                                            heatmap_max_value,
                                            hue=heatmap_color_down)
                                    else:
                                        color = heatmap_color_center
                                    node.add_face(RectFace(
                                        20, 20, color, color),
                                                  position="aligned",
                                                  column=col + i)
                                    # Add header
                                    # for i, name in enumerate(header):
                                    #    nameF = TextFace(name, fsize=7)
                                    #    nameF.rotation = -90
                                    #    tree_style.aligned_header.add_face(nameF, column=i)
                                F = None

                            elif f["ftype"] == "profile":
                                # internal profiles?
                                F = None
                            elif f["ftype"] == "barchart":
                                F = None
                            elif f["ftype"] == "piechart":
                                F = None

                            # Add the Face
                            if F:
                                F.opacity = f['opacity'] or 1.0

                                # Set face general attributes
                                if fbgcolor:
                                    F.background.color = fbgcolor

                                if not f['column']:
                                    col = ftype_pos[f["pos"]]
                                    ftype_pos[f["pos"]] += 1
                                else:
                                    col = f["column"]
                                node.add_face(F, column=col, position=f["pos"])

        if args.image:
            t.render("t%d.%s" % (tindex, args.image),
                     tree_style=ts,
                     w=args.width,
                     h=args.height,
                     units=args.size_units)
        else:
            t.show(None, tree_style=ts)
Example #31
0
from ete3 import Tree

t = Tree('((((H,K)D,(F,I)G)B,E)A,((L,(N,Q)O)J,(P,S)M)C)X;', format=1)
print t.get_ascii(show_internal=True)

#print rooted_tree
Example #32
0
#%%
# Loads a tree with internal node names
t = Tree("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;", format=1)
#%%
#adddint features (e.g. percentages)

from ete3 import Tree
t =  Tree("((((((4, e), i), o),h), u), ((3, 4), (i, june)));")
# we annotate the tree using external data
colors = {"a":"red", "e":"green", "i":"yellow",
          "o":"black", "u":"purple", "4":"green",
          "3":"yellow", "1":"white", "5":"red",
          "june":"yellow"}
for leaf in t:
    leaf.add_features(color=colors.get(leaf.name, "none"))
print(t.get_ascii(attributes=["name", "color"], show_internal=False))


print("Green-yellow clusters:")
# And obtain clusters exclusively green and yellow
for node in t.get_monophyletic(values=["green", "yellow"], target_attr="color"):
   print(node.get_ascii(attributes=["color", "name"], show_internal=False))
   
   
#%%
   
#finding and saving nodes by their names 
   
C= t&"C"
H= t&"H"
I= t&"I"