Exemple #1
0
def main():
    treefile1 = sys.argv[1]
    treefile2 = sys.argv[2]

    treelist = TreeList()
    treelist.read(file=open(treefile1, 'rU'), schema="nexus")
    treelist.read(file=open(treefile2, 'rU'), schema="nexus")

    if treecompare.symmetric_difference(treelist.__getitem__(0),
                                        treelist.__getitem__(1)) == 0:
        print "trees are identical"
    else:
        print "trees are NOT identical"
def readTree(filename, quiet=False):
    if not quiet:
        print()
        print("Reading in files...")
        print()

    temp = TreeList()
    try:
        temp.read(file=open(filename, 'r'), schema="newick", preserve_underscores=True)
    except:
        print("Error with file '{}': please only use files with newick tree format".format(f))
        sys.exit()

    return temp
Exemple #3
0
def readTrees(filenames, namespace, quiet=False):
    if not quiet:
        print()
        print("Reading in files...")
        print()

    sample_tree_list = []
    for f in filenames:
        # temp = TreeList(taxon_namespace=namespace)
        temp = TreeList()
        try:
            temp.read(file=open(f, 'r'),
                      schema="newick",
                      preserve_underscores=True)
        except:
            print(
                "Error with file '{}': please only use files with newick tree format"
                .format(f))
            sys.exit()

        sample_tree_list.append(temp)
    return sample_tree_list
                redundant_count += 1
                break
        else:
            tree_list.append(tree)
    return tree_list, redundant_count


if __name__ == '__main__':
    #inputs#
    mle_tree = raw_input("File with Maximum Likelihood tree: ")
    mcmc_trees = raw_input("File with MCMC trees: ")
    burnin = int(raw_input("Burnin: "))
    outfile = raw_input("Name of outfile: ")

    uts = []  #list of unique topologies
    taxa = dendropy.TaxonSet()  #initialize TaxonSet object
    mle_tree = dendropy.Tree.get_from_path(mle_tree, 'nexus', taxon_set=taxa)
    uts.append(mle_tree)  #MLE tree is the first topology in unique list

    uts, redundant_count = unique_trees(uts,
                                        mcmc_trees,
                                        'nexus',
                                        burnin,
                                        taxonset=taxa)
    print "\nNumber of redundant trees: %d" % redundant_count
    print "Number of unique trees: %d\n" % len(uts)
    unique_tree_list = TreeList(uts)
    unique_tree_list.write_to_path(outfile,
                                   'newick',
                                   suppress_edge_lengths=True)
Exemple #5
0
fin = sys.argv[1]
num = int(sys.argv[2])
fout = sys.argv[3]

f = open(fin, "r")

sp_tree_str = ""
for l in f:
    sp_tree_str += l
f.close()

sp_tree_str = "[&R] " + sp_tree_str

sp_tree = dendropy.Tree.get_from_string(sp_tree_str,
                                        "newick",
                                        preserve_underscores=True)
gene_to_species_map = dendropy.TaxonNamespaceMapping.create_contained_taxon_mapping(
    containing_taxon_namespace=sp_tree.taxon_namespace, num_contained=1)
gene_tree_list = TreeList()

for i in range(num):
    gene_tree = treesim.contained_coalescent_tree(
        containing_tree=sp_tree,
        gene_to_containing_taxon_map=gene_to_species_map)
    for t in gene_tree.leaf_nodes():
        t.taxon.label = t.taxon.label.split()[0]
    gene_tree_list.append(gene_tree)

gene_tree_list.write_to_path(fout, 'newick')
Exemple #6
0
#! /usr/bin/env python

from dendropy import TreeList
from sys import argv

trees = TreeList()
trees.read(path=argv[1],schema="newick")

trees.write(path=argv[2],schema="nexus")
Exemple #7
0
@author: smirarab
'''
import dendropy
import sys
import os
import copy
import os.path
from dendropy import TreeList

if __name__ == '__main__':

    if len(sys.argv) < 4: 
        print "USAGE: count [output|-] treefile*"
        sys.exit(1)
    
    count= int(sys.argv[1])
    out=open(sys.argv[2],'w') if sys.argv[2] != "-" else sys.stdout 
    c={}
    trees = None
    for treeName in sys.argv[3:]:
        a = dendropy.TreeList.get_from_path(treeName, 'nexus',rooted=True, tree_offset=200)
        if trees:
            trees.append(a)
        else:
            trees = a
    import random
    samples = TreeList(random.sample(trees,count))
    samples.write(out,'newick',write_rooting=False)
    if out != sys.stdout:
        out.close()
Exemple #8
0
def main(args):
    if len(args) < 2:
        print '''USAGE: %s [tree_file] [outgroups] [-mrca -mrca-dummy (optional)] [output name (optional)] [-igerr (optional)]

-- tree_file: a path to the newick tree file

-- outgroups: a list of outgroups, separated by comma.
The script goes through the list of outgroups. If the outgroup is found in the tree, 
the tree is rooted at that outgroup. Otherwise, the next outgroup in the list is used. 
Each element in the comma-delimited list is itself a + delimited list of taxa.
By default the script makes sure that this list of taxa are monophyletic
in the tree and roots the tree at the node leading to the clade represented 
by outgroups given in the + delimited list.
Alternatively, you can specify -m which will result in mid-point rooting.

Example: HUMAN,ANOCA,STRCA+TINMA first tries to root at HUMAN, if not present, 
tries to use ANOCA, if not present, tries to root at parent of STRCA and TINMA
which need to be monophyletic. If not monophyletic, roots at STRCA.

-- (optional) -mrca: using this option the mono-phyletic requirement is relaxed 
and always the mrca of the + delimited list of outgroups is used.
-- (optional) -mrca-dummy: is like -mrca, but also adds a dummy taxon as outgroup to the root. 
''' % args[0]
        sys.exit(1)
    treeName = args[1]

    outgroups = [x.replace("_", " ") for x in args[2].split(",")]

    # uym2 editted: keep underscore
    #outgroups = [x for x in args[2].split(",")]

    use_mrca = True if len(args) > 3 and (
        args[3] == "-mrca" or args[3] == "-mrca-dummy") else False
    add_dummy = True if len(args) > 3 and (args[3] == "-mrca-dummy") else False
    resultsFile = args[4] if len(args) > 4 else (
        "%s.rooted" %
        treeName[:-9] if treeName.endswith("unrooted") else "%s.rooted" %
        treeName)
    ignore = True if len(args) > 5 and args[5] == "-igerr" else False
    print >> sys.stderr, "Reading input trees %s ..." % treeName,
    #trees = dendropy.treelist.get_from_path(treename, 'newick',rooted=true)
    # uym2 edited: hack for dendropy4
    trees = dendropy.TreeList.get_from_path(treeName, "newick")
    print >> sys.stderr, "%d tree(s) found" % len(trees)
    i = 0
    outtrees = TreeList()
    for tree in trees:
        i += 1
        print >> sys.stderr, ".",
        oldroot = tree.seed_node
        #print "Tree %d:" %i
        if outgroups[0] == "-m":
            print >> sys.stderr, "Midpoint rooting ... "
            tree.reroot_at_midpoint(update_splits=False)
        else:
            mrca = None
            for outgroup in outgroups:
                outs = outgroup.split("+")
                outns = []
                for out in outs:
                    n = tree.find_node_with_taxon_label(out)
                    if n is None:
                        print >> sys.stderr, "outgroup not found %s," % out,
                        continue
                    outns.append(n.taxon)
                if len(outns) != 0:
                    # Find an ingroup and root the tree there
                    for n in tree.leaf_node_iter():
                        if n.taxon not in outns:
                            ingroup = n
                            break
                    #print "rerooting at ingroup %s" %ingroup.taxon.label
                    '''reroot at an ingroup, so that outgroups form monophyletic groups, if possible'''
                    if ingroup.edge.length is not None:
                        #tree.reroot_at_edge(ingroup.edge, update_splits=True,length1=ingroup.edge.length/2,length2=ingroup.edge.length/2)
                        # uym2 editted: hack for dendropy4
                        tree.reroot_at_edge(ingroup.edge,
                                            length1=ingroup.edge.length / 2,
                                            length2=ingroup.edge.length / 2)
                    else:
                        #tree.reroot_at_edge(ingroup.edge, update_splits=True)
                        tree.reroot_at_edge(ingroup.edge)

                    mrca = tree.mrca(taxa=outns)
                    break
            if mrca is None:
                if ignore:
                    print >> sys.stderr, "Outgroups not found: %s" % outgroups
                    print >> sys.stdout, tree.as_string(schema="newick"),
                    continue
                else:
                    print >> sys.stderr, "Outgroups not found: %s" % outgroups
                    continue
                    #raise KeyError("Outgroups not found %d: %s" %(i,outgroups))
            #print mrca.leaf_nodes()
            #if not mono-phyletic, then use the first
            if not use_mrca and len(mrca.leaf_nodes()) != len(outns):
                print >> sys.stderr, "selected set is not monophyletic. Using %s instead. " % outns[
                    0]
                mrca = tree.find_node_with_taxon_label(outns[0].label)
            if mrca.parent_node is None:
                print >> sys.stderr, "Already rooted at the root."
                #print "rerooting on %s" % [s.label for s in outns]
                #tree.reroot_at_midpoint()
            elif mrca.edge.length is not None:
                #print "rerooting at %s" %mrca.as_newick_string()
                if ingroup.edge.length is not None:
                    #tree.reroot_at_edge(mrca.edge, update_splits=False,length1=mrca.edge.length/2,length2=mrca.edge.length/2)
                    #uym2 editted: hack for dendropy4
                    tree.reroot_at_edge(mrca.edge,
                                        length1=mrca.edge.length / 2,
                                        length2=mrca.edge.length / 2)
                else:
                    #tree.reroot_at_edge(mrca.edge, update_splits=False)
                    #uym2 editted: hack for dendropy4
                    tree.reroot_at_edge(mrca.edge)
            else:
                tree.reroot_at_edge(mrca.edge, update_splits=False)
            if add_dummy:
                dummy = tree.seed_node.new_child(taxon=Taxon(label="outgroup"),
                                                 edge_length=1)
                tree.reroot_at_edge(dummy.edge, update_splits=False)
            outtrees.append(tree)
        '''This is to fix internal node labels when treated as support values'''
        while oldroot.parent_node != tree.seed_node and oldroot.parent_node != None:
            oldroot.label = oldroot.parent_node.label
            oldroot = oldroot.parent_node
        if len(oldroot.sister_nodes()) > 0:
            oldroot.label = oldroot.sister_nodes()[0].label
            #tree.reroot_at_midpoint(update_splits=False)

    print >> sys.stderr, "writing results to %s" % resultsFile
    #outtrees.write(open(resultsFile,'w'),'newick',edge_lengths=True, internal_labels=True,write_rooting=False)
    #uym2 editted: hack for dendropy4
    outtrees.write(
        path=resultsFile, schema='newick', suppress_rooting=True
    )  #,edge_lengths=True, internal_labels=True,write_rooting=False)
Exemple #9
0
def trees_from_newick_str_list(newick_list):
    all_tree_str = " ".join(newick_list)
    return TreeList(stream=StringIO(all_tree_str),
                    taxon_set=TaxonSet(),
                    schema="NEWICK")
Exemple #10
0
from argparse import ArgumentParser
from dendropy import TreeList, TaxonNamespace
from dendropy.simulate import treesim
import os

parser = ArgumentParser('Generate trees of a given size with different algos')
parser.add_argument('-n', type=int, help='Tree size', default=100)
parser.add_argument('-d', type=str, help='Output directory')
args = parser.parse_args()

if not os.path.isdir(args.d):
    os.mkdir(args.d)
os.chdir(args.d)
bd2 = TreeList([
    treesim.birth_death_tree(birth_rate=1.0,
                             death_rate=0.5,
                             num_extant_tips=args.n,
                             repeat_until_success=True) for _ in range(100)
])
bd2.write_to_path('birth_death2.nwk', schema='newick')
bd5 = TreeList([
    treesim.birth_death_tree(birth_rate=1.0,
                             death_rate=0.2,
                             num_extant_tips=args.n,
                             repeat_until_success=True) for _ in range(100)
])
bd5.write_to_path('birth_death5.nwk', schema='newick')
taxa = TaxonNamespace(['T{}'.format(x) for x in range(1, args.n + 1)])
king = TreeList(
    [treesim.pure_kingman_tree(taxon_namespace=taxa) for _ in range(100)])
king.write_to_path('kingman.nwk', schema='newick')