Esempio n. 1
0
    def test_rnni(self):

        x.random_tree([a, b, c, d, e, f, g, h, i, j, k, l])


        s = x.newick
        etetree1 = ete2.Tree(s, format=1)
        tl = x.tree_length
        pren = {no.name: no.incident_length for no in x.all_nodes}
        for origin in x.all_nodes:
            if origin.binary != "1" and origin.terminal is False:
                for child in [0,1]:
                    pre_top2 = {no.name: (no.left.name, no.right.name) if no.terminal is False else (None,None) for no in x.all_nodes}
                    old_root = x.root.name
                    x.__rNNI(origin, child)
                    x.set_binary()

                    for target in x.all_nodes:

                        if target.binary.startswith(origin.binary):
                            pass

                        elif origin.binary == "1":
                            pass
                        elif origin.mother == target:
                            pass
                        elif target.mother is not None and target.mother == origin.mother:
                            pass

                        else:
                            s3 = x.newick
                            pren2 = {no.name: no.incident_length for no in x.all_nodes}
                            pre_top = {no.name: (no.left.name, no.right.name) if no.terminal is False else (None,None) for no in x.all_nodes}
                            old_root_2 = x.root.name
                            pre = x.__rSPR(origin, target)
                            etetree3 = ete2.Tree(s3, format=1)
                            x.set_binary()

                            x.revert_topology_move(pre_top, pren, old_root_2)

                            aftn2 = {no.name: no.incident_length for no in x.all_nodes}
                            s2 = x.newick
                            etetree2 = ete2.Tree(s2, format=1)

                            rf = etetree2.robinson_foulds(etetree3)[0]

                            self.assertEqual(rf, 0, "failed for " + origin.name + " and " + target.name)
                            self.assertEqual(pren2, aftn2)

                    x.revert_topology_move(pre_top2, pren, old_root)
                    tl2 = x.tree_length
                    aftn = {no.name: no.incident_length for no in x.all_nodes}
                    s2 = x.newick
                    etetree2 = ete2.Tree(s2, format=1)

                    rf = etetree1.robinson_foulds(etetree2)[0]
                    # self.assertEqual(s, s2)
                    self.assertEqual(rf, 0)
                    self.assertEqual(tl, tl2)
                    self.assertEqual(pren, aftn)
Esempio n. 2
0
def get_tree(tree_string):
    # FIXME
    # Make this much more elegant
    # Also, once a successful parse is achieved, remember the strategy and avoid brute force on subsequent trees

    # Do we need regex magic?
    if "[&" in tree_string and "&&NHX" not in tree_string:
        tree_string = regex1.sub(repl, tree_string)
        if "NHX" not in tree_string:
            tree_string = regex2.sub(repl, tree_string)

    # Try to parse tree as is
    try:
        t = ete2.Tree(tree_string)
        return t
    except (ValueError, ete2.parser.newick.NewickError):
        pass

    # Try to parse tree with internal node labels
    try:
        t = ete2.Tree(tree_string, format=1)
        return t
    except (ValueError, ete2.parser.newick.NewickError):
        # That didn't fix it.  Give up
        return None
    def test_CallTreeMethod(self):
        c = pc.ProgramCaller(config_alt)
        # Write an input file
        infn = output_dir + "Test.fa"
        with open(infn, 'wb') as outfile:
            outfile.write(">a1\nST\n>b2\nKL\n>c3\nSL\n>d4\nKT")

        # test a method that uses the proposed output filename
        exp_outfn = output_dir + "OG234.tre"
        if os.path.exists(exp_outfn):
            os.remove(exp_outfn)
        outfn = c.CallTreeMethod("fasttree", infn, exp_outfn, "OGabc_id")
        self.assertTrue(outfn != None)
        self.assertEqual(outfn, exp_outfn)
        expectedTree = "(a1:1.10312,c3:0.00055,(b2:0.00055,d4:1.46340)0.761:1.41871);"
        with open(outfn, 'rb') as infile:
            tree = infile.read().rstrip()
        self.assertEqual(expectedTree, tree)

        # test a method that generates its own output filename based on unique ID
        exp_outfn = output_dir + "OG234_id.treefile"
        if os.path.exists(exp_outfn): os.remove(exp_outfn)
        outfn = c.CallTreeMethod("iqtree", infn, output_dir + "OG234.tre",
                                 "OG234_id")
        self.assertEqual(outfn, exp_outfn)
        #        with open(outfn, 'rb') as infile: tree = infile.read().rstrip()
        expectedTree = ete2.Tree(
            "(a1:0.7475350209,(b2:0.0000026604,c3:1.2318876921):1.279964,d4:0.0000022111);"
        )
        actualTree = ete2.Tree(outfn)
        for n in expectedTree:
            x = (actualTree & n.name).dist
            self.assertLess(
                abs(x - n.dist) / n.dist, 0.3,
                (n.dist, (actualTree & n.name).dist))
Esempio n. 4
0
def run():

    # Parse options
    parser = optparse.OptionParser(__doc__)
    options, files = parser.parse_args()

    # Read trees
    first = True
    for count, line in enumerate(fileinput.input(files),1):
        t = ete2.Tree(line)
        leaves = t.get_leaves()
        # If first tree, get names
        if first:
            names = sorted([l.name for l in leaves])
            print_middle(names)
            first = False
        for l in leaves:
            if l.name in names:
                l.name = str(names.index(l.name)+1)

        # Print tree line
        print "TREE tree_%d = %s" % (count, t.write())
    
    print_footer()

    # Done
    return 0
Esempio n. 5
0
def drawtrees(error, label):
    # Generate graphical representation of all trees.
    addtext(label, "Drawing trees")
    # Import tree drawing modules
    import ete2
    import re
    start = time.time()
    # Trees to draw
    files = ["ml.tree", "mp.tree", "nj.tree", "ftree.tree"]
    filehs = []
    # Loop over filenames and render image
    for tfile in files:
        try:
            handl = open(tfile, 'r')
            try:
                tree = ete2.Tree(handl.read())
            except:
                error.write(tfile + "corrupted")
            name = re.sub('.tree', '', tfile)
            tree.render(name + '.png')
        except IOError:
            error.write("File: " + tfile + " not found")
            return error
    end = time.time()
    total = end - start
    text = "All trees drawn in " + str(total) + " seconds"
    addtext(label, text)
    return None
Esempio n. 6
0
    def random_topology(
        self,
        nspecies,
        names=None,
        rooted=False,
        ):
        """
        Use ete2 to make a random topology
        Then add random branch lengths drawn from
        some distribution (default = gamma)
        Inner and leaf edge lengths can be drawn from differently parameterised
        versions of the distribution
        """

        if names:
            random.shuffle(names)
        t = ete2.Tree()
        t.populate(nspecies, names_library=names)
        if rooted:
            t.set_outgroup(t.children[0])
        else:
            t.unroot()

        t_as_newick = t.write()
        t_as_newick = t_as_newick.replace(')1', ')')
        return Tree(t_as_newick, name='random tree').pam2sps('strip')
Esempio n. 7
0
def check_supersets(tree):
    if tree.is_leaf():
        return False

    moved = False
    for c1 in tree.children:
        for c2 in tree.children:
            if c1 == c2:
                continue
            if c1.mutations.issubset(c2.mutations):
                c1.detach()
                c2.add_child(c1)
                moved = True
            elif c2.mutations.issubset(c1.mutations):
                c2.detach()
                c1.add_child(c2)
                moved = True

            overlap = c1.mutations.intersection(c2.mutations)
            if len(overlap) > 0:
                c1.detach()
                c2.detach()
                intermediate = instantiate_node(ete2.Tree(name='NoName'))
                intermediate.mutations = overlap
                intermediate.add_child(c1)
                intermediate.add_child(c2)
                tree.add_child(intermediate)
            moved = moved or check_supersets(c1)

    return moved
Esempio n. 8
0
def populate_tree(session, newick, germline_seq, removed_muts):
    tree = ete2.Tree(newick)
    for node in tree.traverse():
        if node.name not in ('NoName', 'germline', ''):
            seq = session.query(Sequence).filter(
                Sequence.ai == node.name).first()
            seq_ids = {}
            for collapsed_seq in get_seqs_collapsed_to(session, seq):
                seq_ids[collapsed_seq.seq_id] = {
                    'ai': collapsed_seq.ai,
                    'tissue': collapsed_seq.sample.tissue,
                    'subset': collapsed_seq.sample.subset,
                    'ig_class': collapsed_seq.sample.ig_class,
                    'copy_number': collapsed_seq.copy_number,
                    'sample_name': collapsed_seq.sample.name,
                    'sample_id': collapsed_seq.sample.id
                }

            node.name = seq.seq_id
            node.add_feature('seq_ids', seq_ids)
            node.add_feature('copy_number',
                             sum([s['copy_number'] for s in seq_ids.values()]))
            modified_seq = remove_muts(seq.sequence, removed_muts,
                                       germline_seq)
            node.add_feature(
                'mutations',
                get_mutations(
                    germline_seq, modified_seq,
                    map(int,
                        json.loads(seq.mutations_from_clone).keys())))
        else:
            node = instantiate_node(node)

    return tree
Esempio n. 9
0
def import_phylo(phylo_list, biodb):
    from chlamdb.biosqldb import manipulate_biosqldb
    import biosql_own_sql_tables
    import ete2
    import re
    server, db = manipulate_biosqldb.load_db(biodb)

    sql = 'create table IF NOT EXISTS biosqldb_phylogenies.BBH_%s (orthogroup varchar(100), phylogeny text, INDEX orthogroup (orthogroup));' % biodb
    server.adaptor.execute(sql, )

    locuslag2orthogroup = biosql_own_sql_tables.locus_tag2orthogroup(biodb)
    l = len(phylo_list)
    for n, phylo in enumerate(phylo_list):
        print "%s/%s" % (n, l)
        t = ete2.Tree(phylo, format=0)
        leaves = [i for i in t.iter_leaves()]
        for leave in leaves:
            try:
                orthogroup = locuslag2orthogroup[leave.name]
                break
            except:
                continue
        sql = 'insert into biosqldb_phylogenies.BBH_%s values ("%s", "%s");' % (
            biodb, orthogroup, t.write())
        try:
            server.adaptor.execute(sql, )
        except:
            print phylo
    server.commit()
Esempio n. 10
0
def build_tree_from_dict(dict_tree, tree=None):
    if tree is None:
        tree = ete2.Tree(name="root")
    for parent, children in dict_tree.iteritems():
        subtree = tree.add_child(name=parent)
        if children:
            subtree = build_tree_from_dict(children, subtree)
    return tree
Esempio n. 11
0
	def visualize(self,savepath='tree.txt',write_perm='False'):
		newick=make_newick(self)+';'
		self.newick=ete2.Tree(newick,format=1)
		print self.newick
		if write_perm:
			f=open(savepath,'w')
			f.write(str(self.newick))
			f.close()
Esempio n. 12
0
    def consume(self, stream):
        for tree_string in stream:
            # Try to parse tree as is
            try:
                t = ete2.Tree(tree_string)
                yield t
                continue
            except (ValueError, ete2.parser.newick.NewickError):
                pass

            # Try to parse tree with internal node labels
            try:
                t = ete2.Tree(tree_string, format=1)
                yield t
            except (ValueError, ete2.parser.newick.NewickError):
                # That didn't fix it.  Give up
                continue
Esempio n. 13
0
def run():
    # Parse options
    parser = optparse.OptionParser(__doc__)
    parser.add_option('-a', '--attribute', dest="attribute", default=None)
    parser.add_option('-d', '--dpi', type="int", default=None)
    parser.add_option('-H', '--height', type="int", dest="h", default=None)
    parser.add_option('-l', '--label', default="name")
    parser.add_option('-m', '--multiple', default=False, action="store_true")
    parser.add_option('-o', '--output', default=None)
    parser.add_option('-u', '--units', default="px")
    parser.add_option('-w', '--width', type="int", dest="w", default=None)
    options, files = parser.parse_args()

    # Setup TreeStyle
    ts = ete2.TreeStyle()
    ts.show_scale = False
    ts.show_branch_support = True

    # Read trees
    for n, line in enumerate(fileinput.input(files)):
        t = ete2.Tree(line)

        # Add faces
        if options.attribute:
            values = set(
                [getattr(l, options.attribute) for l in t.get_leaves()])
            colours = get_colour_set(len(values))
            colour_map = dict(zip(values, colours))
            for l in t.iter_leaves():
                mycolour = colour_map[getattr(l, options.attribute)]
                l.add_face(
                    ete2.CircleFace(radius=10, color=mycolour, style="sphere"),
                    0)
        for l in t.iter_leaves():
            l.add_face(ete2.TextFace(getattr(l, options.label)), 1)

        # Plot or save
        if options.output:
            kw = {}
            if options.h or options.w:
                for o in ("h", "w", "units", "dpi"):
                    if getattr(options, o):
                        kw[o] = getattr(options, o)
            if options.multiple:
                base, ext = os.path.splitext(options.output)
                filename = base + ("_%06d" % (n + 1)) + ext
            else:
                filename = options.output
            t.render(filename, ultrametric, tree_style=ts, **kw)
        else:
            t.show(ultrametric, tree_style=ts)

        if not options.multiple:
            return 0

    return 0
def SupportedHierachies_wrapper(treeName, GeneToSpecies, species, dict_clades, clade_names):
    if not os.path.exists(treeName): return []
    t = ete2.Tree(treeName, format=1)
    G = set(t.get_leaf_names())
    S = list(set(map(GeneToSpecies, G)))
    if len(S) < 4:
        return []
    result = SupportedHierachies(t, G, S, GeneToSpecies, species, dict_clades, clade_names, treeName)    
#    print(treeName)
    return result
Esempio n. 15
0
def ConvertTree(treeString):
    """for trees with sequence names iSp_jSeq replaces the jSeq with 0, 1,..."""
    tree = ete2.Tree(treeString)
    sp_counts = defaultdict(int)
    for seq in tree:
        iSp, jSeq = seq.name.split("_")
        kSeq = sp_counts[iSp]
        sp_counts[iSp] += 1
        seq.name = "%s_%d" % (iSp, kSeq)
    return (tree.write() + "\n")
Esempio n. 16
0
def RootGeneTreesArbitrarily(treesPat, nOGs, outputDir):
    filenames = [treesPat % i for i in xrange(nOGs)]
    outFilenames = [
        outputDir + os.path.split(treesPat % i)[1] for i in xrange(nOGs)
    ]
    treeFilenames = [fn for fn in filenames if fn.endswith(".txt")]
    nErrors = 0
    with open(outputDir + 'root_errors.txt', 'wb') as errorfile:
        for treeFN, outFN in zip(treeFilenames, outFilenames):
            try:
                t = ete2.Tree(treeFN)
                if len(t.get_children()) != 2:
                    R = t.get_midpoint_outgroup()
                    # if it's a tree with 3 genes all with zero length branches then root arbitrarily (it's possible this could happen with more than 3 nodes)
                    if GetTotalLength(t) == 0.0:
                        for leaf in t:
                            R = leaf
                            break
                    elif AllEqualBranchLengths(t):
                        # more generally, for any branch length all branches could have that same length
                        for leaf in t:
                            R = leaf
                            break
                    t.set_outgroup(R)
                t.resolve_polytomy()
                t.write(outfile=outFN)
            except Exception as err:
                try:
                    t = ete2.Tree(treeFN)
                    for leaf in t:
                        R = leaf
                        break
                    t.set_outgroup(R)
                    t.resolve_polytomy()
                    t.write(outfile=outFN)
                except:
                    errorfile.write(treeFN + ": " + str(err) + '\n')
                    nErrors += 1
    if nErrors != 0:
        print("WARNING: Some trees could not be rooted")
        print(
            "Usually this is because the tree contains genes from a single species."
        )
Esempio n. 17
0
def reroot(tree_filepath, output_prefix=None):
    t = ete2.Tree(tree_filepath)

    t.set_outgroup("IMG_2540341180")

    if output_prefix is not None:
        t.write(format=0, outfile=output_prefix)
    else:
        print t.write(format=0)

    sys.stderr.write(
        "Reroot completed successfully. You can ignore previous errors.\n")
Esempio n. 18
0
    def build_consensus_tree(self):

        # Build a list of all clades in the treestream with frequency above the
        # requested threshold, sorted first by size and then by frequency.  Do not
        # include the trivial clade of all leaves.
        clades = []
        for clade, p in self.cp.clade_probs.items():
            if p >= self.frequency:
                clade = clade.split(",")
                clades.append((len(clade), p, set(clade)))
        clades.sort()
        junk, trash, all_leaves = clades.pop()
        clades.reverse()

        # Start out with a tree in which all leaves are joined in one big polytomy
        t = ete2.Tree()
        for l in all_leaves:
            t.add_child(name=l)

        # Now recursively resolve the polytomy by greedily grouping clades
        t = recursive_builder(t, clades)
        cache = t.get_cached_content()

        # Add age annotations
        for clade in t.traverse("postorder"):
            if clade.is_leaf():
                continue
            clade_key = ",".join(sorted([l.name for l in cache[clade]]))
            ages = self.cp.clade_ages[clade_key]
            mean = sum(ages) / len(ages)
            for c in clade.get_children():
                leaf, age = c.get_farthest_leaf()
                c.dist = mean - age
            ages.sort()
            lower, median, upper = [
                ages[int(x * len(ages))] for x in 0.05, 0.5, 0.95
            ]
            clade.add_feature("age_mean", mean)
            clade.add_feature("age_median", median)
            clade.add_feature("age_HPD", "{%f-%f}" % (lower, upper))

            for f in self.cp.clade_attributes:
                values = self.cp.clade_attributes[f][clade_key]
                mean = sum(values) / len(values)
                values.sort()
                lower, median, upper = [
                    values[int(x * len(values))] for x in 0.025, 0.5, 0.975
                ]
                clade.add_feature("%s_mean" % f, mean)
                clade.add_feature("%s_median" % f, median)
                clade.add_feature("%s_HPD" % f, "{%f-%f}" % (lower, upper))
        return t
    def reference_ml_tree(self, fasta):
        infile = '%s/%s' % (self.msa_folder, fasta)
        outfile = '%s/%s.fastTree' % (self.tree_folder, fasta)

        if system('%s  -wag -gamma -out %s %s' %
                  (self.fasttree, outfile, infile)):
            exit('**Error while running:\n\tfastTree')

        tree = ete2.Tree(outfile)
        tree.resolve_polytomy()
        tree.write(outfile='%s-no_polytomies' % outfile)

        return ('%s.fastTree' % fasta, '%s.fastTree-no_polytomies' % fasta)
Esempio n. 20
0
def reroot(tree_filepath, output_prefix=None):
    t = ete2.Tree(tree_filepath)

    ancestor = t.get_common_ancestor("IMG_2264867067", "IMG_638154511")
    t.set_outgroup(ancestor)

    if output_prefix is not None:
        t.write(format=0, outfile=output_prefix)
    else:
        print t.write(format=0)

    sys.stderr.write(
        "Reroot completed successfully. You can ignore previous errors.\n")
Esempio n. 21
0
 def as_ete_object(o):
     if isinstance(o, ete2.Tree):
         return o
     elif isinstance(o, dendropy.Tree) or isinstance(o, dendropy.Node):
         s = o.as_newick_string() + ";"
         #            _LOG.debug(s)
         return ete2.Tree(s)
     elif isinstance(o, list) or isinstance(o, dendropy.TreeList):
         return [as_ete_object(t) for t in o]
     else:
         raise ValueError(
             "Object of type '%s' does not have a native ete2 representation"
             % type(o))
Esempio n. 22
0
def derive_tree_from_splits(current_node, parent_hash, taxon_order, splits):
	split_hash = splits[parent_hash]
	child1_hash, child2_hash = elucidate_cc_split(parent_hash, split_hash)

	child1_node = ete2.Tree()
	child2_node = ete2.Tree()

	current_node.add_child(child1_node)
	current_node.add_child(child2_node)

	child1_size = clade_size(child1_hash)
	child2_size = clade_size(child2_hash)

	if child1_size == 1:
		child1_node.name = clade_taxon_names(child1_hash, taxon_order)[0]
	else:
		derive_tree_from_splits(child1_node, child1_hash, taxon_order, splits)

	if child2_size == 1:
		child2_node.name = clade_taxon_names(child2_hash, taxon_order)[0]
	else:
		derive_tree_from_splits(child2_node, child2_hash, taxon_order, splits)
Esempio n. 23
0
def draw_tree(ptree, labels=None):
    root = ete2.Tree(name='root')
    T = [
        ete2.Tree(name=(str(node) + '[' + str(i) + ']'))
        for i, node in enumerate(ptree.nodes)
    ]
    if labels is not None:
        for t, lab in zip(T, labels):
            t.name += '{' + str(lab) + '}'
    for i, p in enumerate(ptree.parents):
        if p > 0:
            T[p].add_child(T[i])
        else:
            root.add_child(T[i])
    cmap = color_map(max(labels) + 2)
    for t, l in zip(T, labels):
        ns = ete2.NodeStyle()
        ns['bgcolor'] = cmap[l]
        t.set_style(ns)
        if not t.is_leaf():
            t.add_face(ete2.TextFace(t.name), column=0, position='branch-top')
    root.show()
Esempio n. 24
0
 def tree_ete(self):
     """The tree as an object in python memory from ETE2
     We can add attributes to the leaves useful for the comparisons
     that we perform later on."""
     # Load it #
     tree = ete2.Tree(self.tree)
     # Root it #
     five = tree.search_nodes(name='V')
     assert len(five) == 1
     tree.set_outgroup(five[0])
     tree.ladderize()
     # Return results #
     return tree
Esempio n. 25
0
def draw_ete2_tree(organism, snplist, tree_file_name, config, c):
    '''Draws a phylogenetic tree using ETE2

    Keyword arguments:
    organism -- the organism of which to make a tree
    snplist -- a list of the SNP names, positions and state
    file_name -- the name of the out-file _tree.pdf will be added

    '''
    newick = tree_to_newick(organism, config, c)
    tree = ete2.Tree(newick, format=1)
    tree_depth = int(tree.get_distance(tree.get_farthest_leaf()[0]))
    for n in tree.traverse():
        # Nodes are set to red colour
        nstyle = ete2.NodeStyle()
        nstyle["fgcolor"] = "#BE0508"
        nstyle["size"] = 10
        nstyle["vt_line_color"] = "#000000"
        nstyle["hz_line_color"] = "#000000"
        nstyle["vt_line_type"] = 0
        nstyle["hz_line_type"] = 0
        nstyle["vt_line_width"] = 2
        nstyle["hz_line_width"] = 2
        for snp in snplist:
            if n.name == snp[0]:
                if snp[1] == snp[3]:
                    # If the SNP is Derived in snplist,
                    # change appearance of node
                    nstyle["fgcolor"] = "#99FF66"
                    nstyle["size"] = 15
                    nstyle["vt_line_color"] = "#000000"
                    nstyle["hz_line_color"] = "#000000"
                    nstyle["vt_line_type"] = 0
                    nstyle["hz_line_type"] = 0
                elif snp[3] == "-":
                    # If the SNP is missing due to a gap, make it grey
                    nstyle["fgcolor"] = "#DDDDDD"
                    nstyle["size"] = 10
                    nstyle["vt_line_color"] = "#DDDDDD"
                    nstyle["hz_line_color"] = "#DDDDDD"
                    nstyle["vt_line_type"] = 1
                    nstyle["hz_line_type"] = 1
        n.set_style(nstyle)
    ts = ete2.TreeStyle()
    ts.show_leaf_name = False  # Do not print(leaf names, they are added in layout)
    ts.show_scale = False  # Do not show the scale
    ts.layout_fn = CanSNPer_tree_layout  # Use the custom layout
    ts.optimal_scale_level = 'full'  # Fully expand the branches of the tree
    if config["dev"]:
        print("#[DEV] Tree file: %s" % tree_file_name)
    tree.render(tree_file_name, tree_style=ts, w=tree_depth * 500)
Esempio n. 26
0
	def save_tree_to_file(self,filepath):
		newick=make_newick(self)+';'
		# countleft=0
		# countright=0
		# for char in newick:
		# 	if char=='(':
		# 		countleft+=1
		# 	elif char==')':
		# 		countright+=1
		# print countleft,' ',countright
		# print newick
		self.newick=ete2.Tree(newick,format=1)
		ts=ete2.TreeStyle()
		ts.rotation=90
		#self.newick.show(tree_style=ts)
		self.newick.render(filepath,w=500,tree_style=ts)
Esempio n. 27
0
	def __init__(self, newick_strings):
		self.taxon_order = []
		self.newick_strings = []
		self.topology_arrays = []

		self.newick_strings = newick_strings
		self.n_topologies = len(self.newick_strings)

		for i in range(self.n_topologies):
			ns = self.newick_strings[i]
			tree = ete2.Tree(ns)
			if i == 0:
				taxa = tree.get_leaf_names()
				self.taxon_order = sorted(taxa)

			self.generate_topology_array(ns)
Esempio n. 28
0
	def render_tree(self):
		newick=make_newick(self)+';'
		# countleft=0
		# countright=0
		# for char in newick:
		# 	if char=='(':
		# 		countleft+=1
		# 	elif char==')':
		# 		countright+=1
		# print countleft,' ',countright
		# print newick
		self.newick=ete2.Tree(newick,format=8)
		ts=ete2.TreeStyle()
		ts.rotation=90
		#self.newick.show(tree_style=ts)
		self.newick.show(tree_style=ts)
Esempio n. 29
0
 def RenameTreeTaxa(self,
                    treeFN,
                    newTreeFilename,
                    idsMap,
                    qFixNegatives=False):
     #        with open(treeFN, "rb") as inputTree: treeString = inputTree.next()
     try:
         tree = ete2.Tree(treeFN)
         for node in tree.get_leaves():
             node.name = idsMap[node.name]
         if qFixNegatives:
             for n in tree.traverse():
                 if n.dist < 0.0: n.dist = 0.0
         tree.write(outfile=newTreeFilename, format=4)
     except:
         pass
Esempio n. 30
0
def calculate_topology_probabilities(ts):
	topology_counts = {}
	topology_data = {}
	cc_counts = {}
	cc_data = {}
	clade_sizes = {}

	for i in range(ts.n_trees):
		tree_array = ts.tree_arrays[i]
		topology_hash = tree_array["f0"].tostring() # topology hash is concatenated, sorted clade hashes

		if topology_hash not in topology_counts: # record topology
			tree_newick = ts.newick_strings[i]
			tree_root = ete2.Tree(tree_newick)
			topology_newick = tree_root.write(format = 9) # strip branch lengths
			topology_data[topology_hash] = topology_newick
			topology_counts[topology_hash] = 1
		else:
			topology_counts[topology_hash] += 1

		topology_array = tree_array[["f0", "f1"]] # we are only interested in clade & split hashes, not node heights
		for node in topology_array:
			parent_hash = node[0].tostring() # the hash for the clade
			split_hash = node[1].tostring() # the hash for the bifurcation

			n_node_taxa = clade_size(parent_hash)
			clade_sizes[parent_hash] = n_node_taxa
			if n_node_taxa >= 3: # record conditional clade
				if parent_hash not in cc_counts:
					cc_data[parent_hash] = {split_hash: node}
					cc_counts[parent_hash] = {split_hash: 1}
				elif split_hash not in cc_counts[parent_hash]:
					cc_data[parent_hash][split_hash] = node
					cc_counts[parent_hash][split_hash] = 1
				else:
					cc_counts[parent_hash][split_hash] += 1

	clades_set = CladeProbabilities(clade_sizes)
	topology_set = TopologyProbabilities(topology_data)

	cc_sets = {}
	for parent_hash, splits_data in cc_data.items():
		cc_sets[parent_hash] = DiscreteProbabilities(splits_data)

	return topology_set, topology_counts, cc_sets, cc_counts, clades_set