def main():
    # getting the tree
    tree_gen = Phylo.parse(PATH_EXAMPLE, 'newick')
    tree_object = next(tree_gen)

    # the tree basic information
    print(tree_info(tree_object))

    # drawing the tree
    Phylo.draw(tree_object)

    # distance comparing
    tns = dendropy.TaxonNamespace()
    tre_one = Tree.get_from_path(PATH_EXAMPLE, 'newick', taxon_namespace=tns)
    tre_two = Tree.get_from_path(PATH_BIF, 'newick', taxon_namespace=tns)

    euclidean_distance = treecompare.euclidean_distance(tre_one, tre_two)
    robinson_distance = treecompare.robinson_foulds_distance(tre_one, tre_two)
    print("Robinson Foulds distance: ", robinson_distance)
    print("Euclidean distance: ", euclidean_distance)

    # common ancestors
    common_ancestor_tree = tree_object.common_ancestor({"name": "C"},
                                                       {"name": "D"})
    common_ancestor_tree.color = "blue"
    print("COMMON ANCESTOR: ", common_ancestor_tree)
    Phylo.draw(common_ancestor_tree)
Exemple #2
0
    def calculate_robinson_foulds(self, species_tree, gene_tree, weighted):
        """
        Calculates the Robinson Foulds distances for weighted and unweighted
        trees.

        Input:
        species_tree -- newick file or newick string containing the species tree
        gene_tree   -- newick file or newick string containing the tree to
                          be compared to the species tree
        weighted       -- boolean parameter for whether the files have weights

        Returns:
        The weighted and/or unweighted Robinson Foulds distance of the species
        tree and input tree.
        """

        # taxon names
        tns = dendropy.TaxonNamespace()

        # Create dendropy tree from species tree input file
        if os.path.isfile(species_tree):
            species_tree = Tree.get_from_path(species_tree,
                                              'newick',
                                              taxon_namespace=tns)

        # Create dendropy tree from species tree input newick string
        else:
            species_tree = Tree.get_from_string(species_tree,
                                                'newick',
                                                taxon_namespace=tns)

        # Create dendropy tree from gene tree input file
        if os.path.isfile(gene_tree):
            gene_tree = Tree.get_from_path(gene_tree,
                                           'newick',
                                           taxon_namespace=tns)

        # Create dendropy tree from gene tree input newick string
        else:
            gene_tree = Tree.get_from_string(gene_tree,
                                             'newick',
                                             taxon_namespace=tns)

        # both weighted and unweighted foulds distance
        if weighted:
            return treecompare.weighted_robinson_foulds_distance(species_tree, gene_tree), \
                   treecompare.unweighted_robinson_foulds_distance(species_tree, gene_tree)

        # only unweighted foulds distance
        else:
            return treecompare.unweighted_robinson_foulds_distance(
                species_tree, gene_tree)
Exemple #3
0
def read_lsd_results(inputDir):
    # suppose LSD was run on the "mytree.newick" and all the outputs are placed inside inputDir
    log_file = normpath(join(inputDir, "mytree.tre.result"))
    input_tree_file = normpath(join(inputDir, "mytree.tre"))
    result_tree_file = normpath(join(inputDir, "mytree.tre.result.newick"))

    s = open(log_file, 'r').read()
    i = s.find("Tree 1 rate ") + 12
    mu = ""
    found_dot = False

    while (s[i] == '.' and not found_dot) or (s[i]
                                              in [str(x) for x in range(10)]):
        mu += s[i]
        if s[i] == '.':
            found_dot = True
        i += 1
    mu = float(mu)

    taxa = TaxonNamespace()
    tree = Tree.get_from_path(input_tree_file,
                              schema="newick",
                              taxon_namespace=taxa,
                              rooting="force-rooted")
    tree.encode_bipartitions()
    n = len(list(tree.leaf_node_iter()))
    N = 2 * n - 2
    x0 = [10**-10] * N + [mu]

    idx = 0
    brlen_map = {}

    for node in tree.postorder_node_iter():
        if not node is tree.seed_node:
            key = node.bipartition
            brlen_map[key] = (idx, node.edge_length)
            idx += 1

    tree2 = Tree.get_from_path(result_tree_file,
                               schema="newick",
                               taxon_namespace=taxa,
                               rooting="force-rooted")
    tree2.encode_bipartitions()

    for node in tree2.postorder_node_iter():
        if not node is tree2.seed_node:
            key = node.bipartition
            idx, el = brlen_map[key]
            if el > 0 and node.edge_length > 0:
                x0[idx] = node.edge_length / float(el)

    return x0
Exemple #4
0
def main():
    from sys import argv

    tree = Tree.get_from_path(argv[1], "newick")
    '''
    smpl_times = {}
    with open(argv[2],"r") as fin:
        fin.readline()
        for line in fin:
            name,time = line.split()
            smpl_times[name] = float(time)
    '''
    f = deviation_from_clock(tree)

    tree.write_to_path(argv[2], "newick")
    '''
    m=sum([1/x for x in f])/len(f)
    with open('f.txt','w') as fout:
        for x in f:
            fout.write(str(1/x/m) + "\n")
    '''

    #f = calibrate_with_sampling_time(tree,smpl_times)
    #f = calibrate_tree(tree)
    #print(f)
    '''
def main():
    
    cpu = sys.argv[1]
    job_name = sys.argv[2]
    
    try:
        alnfile = sys.argv[3]
    except:
        assert(restart is True), "Specified alignment file does not exist. Path?"
    try:
        treefile = sys.argv[4]
    except:
        assert(restart is True), "Specified tree file does not exist. Path?"
    
    # Rewrite tree to create trifurcating root, as needed by phylobayes mpi
    tree = Tree.get_from_path(treefile, "newick", rooting = "force-unrooted")
    tree.resolve_polytomies() # in case of polytomies.
    tree.update_bipartitions() # this will create a trifurcating root on an unrooted tree
    tstring = str(tree).replace('[&U] ', '')
    with open('temp.tre', 'w') as tf:
        tf.write(tstring + ';\n')
        
    # Phylobayes is run to chain length 5500, sampling every 5 to yield 1100. Later, burnin of 100 is removed to get a final posterior n=1000 (same procedure as Rodrigue 2013 Genetics)
    pb_call = "mpirun -np " + str(cpu) + " ./pb_mpi -mutsel -cat -d " + alnfile + " -T temp.tre -x 5 1100 " + job_name
    
    run_pb_call = subprocess.call(pb_call, shell = True)
    assert( run_pb_call == 0 ), "pb_mpi didn't run!"
    
    # Parse output with readpb_mpi, using a burnin of 100 and saving everything else (posterior size = 1000)
    readpb_call = "mpirun -np " + str(cpu) + " ./readpb_mpi -x 100 1 -1 " + job_name + "\n"
    run_readpb_call = subprocess.call(readpb_call, shell = True)
    assert( run_readpb_call == 0 ), "readpb_mpi didn't run!"
def readTreeFromFile( treePath):
    '''
    input: path to the file containing newick tree
    return Tree object 
    '''
    myTree= Tree.get_from_path(treePath, 'newick', annotations_as_nhx=True, extract_comment_metadata=True , suppress_annotations=False)
    return myTree
		def __init__(self,ddpTree=None,tree_file=None,schema="newick",Tree_records=[]):
			if tree_file:
				self.ddpTree = Tree.get_from_path(tree_file,schema)
			else:
				#self.ddpTree = copy.deepcopy(ddpTree)
				self.ddpTree = ddpTree
			self.Tree_records = Tree_records
 def __init__(self, ddpTree=None, tree_file=None, schema="newick"):
     if ddpTree:
         self.ddpTree = ddpTree
     else:
         self.ddpTree = Tree.get_from_path(tree_file,
                                           schema,
                                           preserve_underscores=True)
def get_tree_lines(Tname):
	stringlist =[]
	from dendropy import Tree
	tree = Tree.get_from_path(Tname,"newick")
	for nd in tree.postorder_internal_node_iter():
	    for child in nd.child_nodes():
	        stringlist.append(child.as_newick_string())
	return (stringlist)
Exemple #10
0
def tree_compare(tempdir):
    # CHANGE to tempdir
    tns = dendropy.TaxonNamespace()
    tree1 = Tree.get_from_path(tempdir + "/ref.tree",
                               "newick",
                               taxon_namespace=tns)
    tree2 = Tree.get_from_path(tempdir + "/normal_tree",
                               "newick",
                               taxon_namespace=tns)
    tree3 = Tree.get_from_path(tempdir + "/red_tree",
                               "newick",
                               taxon_namespace=tns)
    tree1.encode_bipartitions()
    tree2.encode_bipartitions()
    tree3.encode_bipartitions()
    distance_normal = treecompare.symmetric_difference(tree1, tree2)
    distance_reduced = treecompare.symmetric_difference(tree1, tree3)
    return distance_normal, distance_reduced
Exemple #11
0
 def _read_tree_from_path(path, taxon_namespace):
     """
     Wrapper for netwick-file to dendropy tree
     """
     tree = Tree()
     my_tree = tree.get_from_path(path,
                                  "newick",
                                  taxon_namespace=taxon_namespace)
     return my_tree
        def __init__(self,ddpTree=None,tree_file=None,schema="newick",Tree_records=[]):
                if tree_file:
                        self.ddpTree = Tree.get_from_path(tree_file,schema)
                else:
                        #self.ddpTree = copy.deepcopy(ddpTree)
			self.ddpTree = ddpTree
                self.Tree_records = Tree_records
                self.min_MD = None
                self.opt_root = self.ddpTree.seed_node
                self.opt_x = 0
Exemple #13
0
def main():
    from sys import argv

    treefile = argv[1]

    t = Tree.get_from_path(treefile, "newick")

    R = resolve_tree(t)

    for s in R:
        print(s)
def recom_resultFig_dm(recom_prob, mixtureProb):
    output = np.zeros((alignment_len, nodes_number))
    for i in range(len(recom_prob)):
        if (recom_prob['recom_nodes'][i] < tips_num):
            for j in range(alignment_len):
                if (recom_prob['posterior'][i][j][1] >= mixtureProb):
                    output[j, recom_prob['recom_nodes'][i]] = 1
        else:
            # for j in range(alignment_len):
            #     if (recom_prob['posterior'][i][j][1] >= mixtureProb):
            #         output[j, recom_prob['target_node'][i]] = 1
            for j in range(i + 1, len(recom_prob)):
                if (recom_prob['recom_nodes'][i]
                        == recom_prob['target_node'][j]) and (
                            recom_prob['recom_nodes'][j]
                            == recom_prob['target_node'][i]):
                    for k in range(alignment_len):
                        if ((recom_prob['posterior'][i][k][1] >= mixtureProb)
                                and
                            (recom_prob['posterior'][j][k][1] >= mixtureProb)):
                            output[k, recom_prob['target_node'][i]] = 1
                        # if (recom_prob['posterior'][i][k] < recom_prob['posterior'][j][k]):
                        #   recom_prob['posterior'][i][k] = recom_prob['posterior'][j][k]
                        # if (recom_prob['posterior'][i][k] >= mixtureProb):
                        #     output[k, recom_prob['target_node'][i]] = 1

    fig = plt.figure(figsize=(tips_num + 9, tips_num / 2))
    color = ['red', 'green', 'purple', 'blue', 'black']
    clonaltree = Tree.get_from_path(tree_path, 'newick')
    set_index(clonaltree, alignment)
    for i in range(nodes_number):
        ax = fig.add_subplot(nodes_number, 1, i + 1)
        if i >= tips_num:
            desc = set()
            d = give_descendents(clonaltree, i, desc)
            ax.plot(output[:, i],
                    label=str(i) + ' is mrca:' + str(d),
                    color=color[i % 5])
        else:
            ax.plot(output[:, i],
                    label=give_taxon(clonaltree, i),
                    color=color[i % 5])
        ax.legend(bbox_to_anchor=(0.045, 1.5), prop={'size': 10})
        ax.set_frame_on(False)
        ax.axis('off')

    ax.axis('on')
    ax.set_yticklabels([])
    plt.savefig("PhyloHMM_Recombination_two.jpeg")
    # plt.show()

    return output
Exemple #15
0
def main():
    from sys import argv

    tree = Tree.get_from_path(argv[1], 'newick')
    sampling_time = {}

    with open(argv[2], 'r') as fin:
        fin.readline()
        for line in fin:
            taxon, time = line.split()
            sampling_time[taxon] = float(time)

    x_best = log_from_random_init(tree, sampling_time)
Exemple #16
0
def scale_tree_branch(tree, format="newick"):
        tree_obj = None
        if os.path.exists(tree):
                tree_obj = Tree.get_from_path(tree, format)
        elif isinstance(tree, str):
                tree_obj = Tree(stream=StringIO(tree), schema=format)
        elif isinstance(tree, Tree):
                tree_obj = Tree
        if sum([ e.length > 1 for e in tree_obj.postorder_edge_iter()]):
                for e in tree_obj.postorder_edge_iter():
                        if e.length is not None:
                                e.length = e.length/100
        return tree_obj.as_newick_string()
Exemple #17
0
def main():

    import argparse

    parser = argparse.ArgumentParser()

    parser.add_argument("-i","--input",required=True,help="Input trees")
    parser.add_argument("-o","--output",required=True,help="Output trees")
    parser.add_argument("-r","--ref",required=True,help="Reference tree")
    
    args = vars(parser.parse_args())

    inputfiles = args["input"].split()
    outputfiles = args["output"].split()
    refFile = args["ref"] if args["ref"] else None

    if not (len(outputfiles) == 1 or len(outputfiles) == len(inputfiles)):
        print("The number of output files must either be 1 or the same as the number of input files!")
    else:
        multi_output = len(outputfiles) > 1
         
        if not multi_output:
            fout = open(outputfiles[0],'w')
        
        taxa = TaxonNamespace()
   
        tree = Tree.get_from_path(refFile,"newick",taxon_namespace=taxa,rooting="force-rooted")
        label_mapping = read_label_from_reference_tree(tree)

    
    # Although using TreeList provided in Dendropy can be a more convenient solution,
    # I opted out for that because it requires storing a large number of trees in the memory at the same time
    # If the input trees are big then we will run out of memory 
    # Had problem with a set of 7k trees of 10k leaves which required >60G of memory just to store the trees
    # Here I read each tree and label it one-by-one. 
    #Just have to be thoughtful about making the taxon_namespace shared among all the trees
        for i,filein in enumerate(inputfiles):
            if multi_output:
                fout = open(outputfiles[i],'w')
            with open(filein,'r') as fin:
                strings = fin.readlines()       
                for s in strings:
                    tree = Tree.get(data=s,schema="newick",taxon_namespace=taxa,rooting="force-rooted")
                    label_tree(tree,label_mapping)
                    fout.write(tree.as_string("newick"))
            if multi_output:
                fout.close()
        
        if not multi_output:        
            fout.close()                                     
Exemple #18
0
def main():
    tree_file = argv[1]
    sampling_time_file = argv[2]

    tree = Tree.get_from_path(tree_file, "newick")
    sampling_time = {}

    with open(sampling_time_file, 'r') as fin:
        fin.readline()
        for line in fin:
            taxon, time = line.split()
            sampling_time[taxon] = float(time)

    random_date_init(tree, sampling_time, 10, min_nleaf=8)
    '''        
def returnRootOfTree( infile, filePrefix, ext):
    '''
    input: path to the file containing newick tree
    return root of the Tree 
    '''
    
    directory=os.path.dirname(os.path.realpath(infile))
    treePath= directory+'/'+filePrefix+'.'+ ext
    rootNode=''
    myTree= Tree.get_from_path(treePath, 'newick', annotations_as_nhx=True, extract_comment_metadata=True , suppress_annotations=False)
    for i in myTree.internal_nodes():
        if i.level() == 0:
            rootNode=i.get_node_str()
            break
    return rootNode
def changeSpeciesTreeLabels(stree):
    '''
        this function change the labels of species tree to new names
    '''
    myStree= Tree.get_from_path(stree, 'newick', annotations_as_nhx=True, extract_comment_metadata=True , suppress_annotations=False)
    myStree.print_plot()
    k=0
    with open(stree+'.labels', 'w') as wf:
        for n in myStree.leaf_nodes():
            #wf.write(n.taxon.label +'\t'+ 'S'+str(k+1) +'\n')
            wf.write(n.taxon.label +'\t'+ str(k+1) +'\n')
            #n.taxon.label= 'S'+str(k+1)
            n.taxon.label= str(k+1)
            k=k+1
    myStree.print_plot()
    with open(stree+'.newNewick', 'w') as wf:
        st=myStree.as_string('newick')
        wf.write(st)
Exemple #21
0
def g(x):
	from dendropy import Tree
	t = Tree.get_from_path(x, 'newick')
	# normalize branch lengths
	# first make sure the root has an edge length of None
	num_edges = 0
	scale = 0.
	for n in t.nodes():
		if n.parent_node is None:
			n.edge_length = None
		else:
			num_edges += 1
			scale += n.edge_length
	scale /= num_edges
	for n in t.nodes():
		if n.edge_length is not None: n.edge_length /= scale
	assert (t.length()/num_edges - 1.) < 0.01
	return t
Exemple #22
0
def evaluate(ref, file_name):

    # To store the data during the process, we create two temporary files.
    tmp1 = tempfile.mkstemp()
    tmp2 = tempfile.mkstemp()

    # Use the commands of fastprot and fnj.
    # The output of the FastPhylo programs is in file 'tmp2'.
    os.system("fastprot -m -o " + tmp1[1] + " " + file_name)
    os.system("fnj -O newick -m FNJ -o " + tmp2[1] + " " + tmp1[1])

    #Use Dendropy to compare the trees.
    in_tree = Tree.get_from_stream(os.fdopen(tmp2[0]),
                                   schema='newick',
                                   taxon_namespace=tns)
    ref_tree = Tree.get_from_path(ref, schema='newick', taxon_namespace=tns)
    sym_diff = treecompare.symmetric_difference(ref_tree, in_tree)

    return sym_diff
Exemple #23
0
def make_recombination_trees(tree_path, tree, dna, target_node, nu):
    temptree = {}
    recombination_trees = []
    tree.reroot_at_node(target_node,
                        update_bipartitions=False,
                        suppress_unifurcations=True)
    recombination_trees.append(tree.as_string(schema="newick"))
    for id, child in enumerate(target_node.child_node_iter()):
        temptree["tree{}".format(id)] = Tree.get_from_path(tree_path, 'newick')
        set_index(temptree["tree{}".format(id)], dna)
        temptree["tree{}".format(id)].reroot_at_node(
            target_node,
            update_bipartitions=False,
            suppress_unifurcations=True)
        filter_fn = lambda n: hasattr(n, 'index') and n.index == child.index
        recombined_node = temptree["tree{}".format(id)].find_node(
            filter_fn=filter_fn)
        recombination_trees.append(
            tree_evolver_rerooted(temptree["tree{}".format(id)],
                                  recombined_node, nu))
    return recombination_trees
def extract_tree_info(file):
    
    t = Tree.get_from_path(file, 'newick')
    
    # tree length
    tree_length = str(t.length())

    # mean root-to-tip distance
    treetips = t.leaf_nodes()
    rtt = []
    for tip in treetips:
        rtt.append( tip.distance_from_root() )
    mean_rtt = str(np.mean(rtt))
    
    # mean patristic distance
    pd = []
    dist = treemeasure.PatristicDistanceMatrix(tree=t)
    for i, t1 in enumerate(t.taxon_namespace):
        for t2 in t.taxon_namespace[i:]:
            d = dist(t1,t2)
            pd.append( float(d) )
    mean_pairwise = str(np.mean(pd))
        
    return tree_length, mean_rtt, mean_pairwise 
parser = argparse.ArgumentParser()

parser.add_argument('-i', '--input', required=True, help="input file")
parser.add_argument('-o', '--outfile', required=True, help="output file")
parser.add_argument('-u',
                    '--unit',
                    required=False,
                    help="unit-length for unit-based filter")
parser.add_argument('-l', '--lowthres', required=False, help="low threshold")
parser.add_argument('-g', '--highthres', required=False, help="high threshold")
parser.add_argument('-f', '--factor', required=False, help="factor")

args = vars(parser.parse_args())

infile = args['input']
outfile = args['outfile']
a_tree = Tree.get_from_path(infile, "newick", preserve_underscores=True)
unit = args['unit'] if args['unit'] else None
low = float(args['lowthres']) if args['lowthres'] else 0
high = float(args['highthres']) if args['highthres'] else 1
factor = float(args['factor']) if args['factor'] else 1

filter_branch(a_tree,
              unit_length=args['unit'],
              low_percentile=low,
              high_percentile=high,
              factor=factor)

a_tree.write_to_path(outfile, "newick")
Exemple #26
0
#! /usr/bin/env python

from dendropy import Tree
from decompose_lib import decompose_by_diameter, compute_group_distance_matrix, place_group_onto_tree
import sys
import os
from pasta import get_logger

_LOG = get_logger(__name__)

intree_file = sys.argv[1]
grouping_file = sys.argv[2]
nleaf_file = sys.argv[3]
distance_file = sys.argv[4]

t = Tree.get_from_path(intree_file, 'newick')

grouping = {}
with open(grouping_file, 'r') as f:
    for line in f:
        name, taxon = line.split()
        grouping[taxon] = name
_LOG.info('computing treeMap ... ')
treeMap = place_group_onto_tree(t, grouping)

D = compute_group_distance_matrix(t, treeMap)

with open(distance_file, 'w') as f:
    for A, B in D:
        f.write(A + " " + B + " " + str(D[(A, B)]) + "\n")
Exemple #27
0
 def open_tree(self, treefile):
     self.sim_tree = Tree.get_from_path(treefile, schema="newick")  # , as_rooted=True)
     self.sim_tree.reroot_at_midpoint()
Exemple #28
0
def get_taxa(tree_file, scheme='newick'):
    a_tree = Tree.get_from_path(tree_file, scheme, preserve_underscores=True)
    return [leaf.taxon.label for leaf in a_tree.leaf_nodes()]
Exemple #29
0
#! /usr/bin/env python

import logdate
from logdate.logD_lib import random_timetree
from dendropy import Tree
import dendropy
#import treeswift
from logdate.tree_lib import tree_as_newick
import argparse
from sys import argv,stdout

parser = argparse.ArgumentParser()

parser.add_argument("-i","--input",required=True,help="Input tree")
parser.add_argument("-t","--samplingTime",required=False,help="Sampling time at leaf nodes. Default: None")
parser.add_argument("-p","--rep",required=False,help="The number of random replicates. Default: 1")
parser.add_argument("-s","--rseed",required=False,help="Random seed. Default: randomly chosen and will be reported")
parser.add_argument("-o","--output",required=False,help="Output file. Default: None. The trees will be printed to screen")

args = vars(parser.parse_args())

tree = Tree.get_from_path(args["input"],'newick',preserve_underscores=True)
sampling_time = args["samplingTime"]
nrep = int(args["rep"]) if args["rep"] else 1
randseed = int(args["rseed"]) if args["rseed"] else None
fout = open(args["output"],'w') if args["output"] is not None else stdout

random_timetree(tree,sampling_time,nrep,seed=randseed,fout=fout)
Exemple #30
0
from calibration_lib import calibrate_tree
from sys import argv
from dendropy import Tree

treefile = argv[1]
outfile = argv[2]

myTree = Tree.get_from_path(treefile, 'newick')

print("Read tree successfully")

print(calibrate_tree(myTree, verbose=True))

myTree.write_to_path(outfile, "newick")
Exemple #31
0
        # print(node.taxon)
        if not node.is_leaf():
            node.index = s
            node.label = str(node.index)
            s += 1
        else:
            for idx, name in enumerate(dna):
                # print(idx , str(name) , str(node.taxon))
                if str(name) == str(node.taxon):
                    node.index = idx
                    node.label = str(node.index)
                    break


tree_path = '/home/nehleh/Documents/0_Research/PhD/Data/simulationdata/recombination/ShortDataset/RAxML_bestTree.tree'
tree = Tree.get_from_path(tree_path, 'newick')
alignment = dendropy.DnaCharacterMatrix.get(file=open("/home/nehleh/Documents/0_Research/PhD/Data/simulationdata/recombination/ShortDataset/wholegenome.fasta"), schema="fasta")


print(tree.as_ascii_plot())

pi = [0.2184,0.2606,0.3265,0.1946]
rates = [0.975070 ,4.088451 ,0.991465 ,0.640018 ,3.840919 ]
GTR_sample = myPhylo.GTR_model(rates,pi)

column = myPhylo.get_DNA_fromAlignment(alignment)
dna = column[0]
setup_indexes(tree,alignment)
tips = len(dna)

Exemple #32
0
#! /usr/bin/env python

from sys import argv
from dendropy import Tree

annoFile = argv[1]  # ~/10kBacGenome/repophlan_microbes_ranks.txt
treefile = argv[2]

myTree = Tree.get_from_path(treefile, "newick")
nameHash = {}
global_phylCount = {}

with open(annoFile, 'r') as f:
    for line in f:
        fields = line.split()
        name = fields[0]
        phylum = fields[2]
        nameHash[name] = phylum
        #global_phylCount[phylum] = 1 + (global_phylCount[phylum] if phylum in global_phylCount else 0)

# count the number of species in each phylum
for node in myTree.leaf_node_iter():
    phylum = nameHash[node.taxon.label]
    global_phylCount[phylum] = 1 + (global_phylCount[phylum]
                                    if phylum in global_phylCount else 0)

# label internal nodes
ID = 0
for node in myTree.preorder_node_iter():
    if not node.is_leaf():
        node.label = "I_" + str(ID)
Exemple #33
0
#! /usr/bin/env python

import re
from dendropy import Tree
from sys import argv

filename = argv[1]

a_tree = Tree.get_from_path(filename, 'newick')
br_sum = 0
br_count = 0
br_max = -1.0

for edge in a_tree.preorder_edge_iter():
    if edge.length is not None:
        br_count += 1
        br_sum += edge.length
        if edge.length > br_max:
            br_max = edge.length

br_avg = br_sum / br_count

print("branch #: " + str(br_count))
print("branch max: " + str(br_max))
print("branch sum: " + str(br_sum))
print("branch avg: " + str(br_avg))
def compare_trees(tree_filename1, tree_filename2):
	from dendropy import Tree, TreeList
	from dendropy.treecalc import symmetric_difference
	g = lambda x: Tree.get_from_path(x, 'newick')
	c = TreeList([g(tree_filename1), g(tree_filename2)])
	return symmetric_difference(c[0], c[1])
Exemple #35
0
argparser = argparse.ArgumentParser()

argparser.add_argument('--preproot', metavar='tree_root', type=str, required=True)


args = argparser.parse_args()

for infile in glob(path.join(args.preproot, "*", "*.nwk")):
    
    print infile
    
    basename = path.basename(infile).partition('.')[0]
    prefix = basename.partition('_')[0][:2]
    
    
    tree = Tree.get_from_path(infile, 'newick', preserve_underscores=True)
    
    for node in tree:
        if node.is_leaf():
            if "." in node.taxon.label:
                node.taxon.label = node.taxon.label.replace(".", "")
            
    tree_file = open(infile, "r+")
    
    tree_file.seek(0)
    tree_file.write(tree.as_string('newick'))
    tree_file.truncate()
    tree_file.close
    
    # remove quotes
    tree_file = open(infile, "r+")
Exemple #36
0
 def __get_tree(self,taxon_set):
     tree_file_name=self.path+"/RAxML_result."+self.param_names
     tree=Tree.get_from_path(tree_file_name,'newick',encode_splits=True,taxon_set=taxon_set)
     return tree
Exemple #37
0
    except (AttributeError, KeyError):
        out.write(label)
    

    if sel is not None:
        s = ""
        try:
            s = float(sel)
            s = str(s)
        except ValueError:
            s = str(sel)
        if s:
            out.write(":%s" % s)

if __name__ == "__main__":
    #test
    import sys
    from dendropy import Tree
    from collections import OrderedDict
    Tree.write_preorder_to_csv = write_preorder_to_csv
    Tree.set_node_ages = set_node_ages
    t = Tree.get_from_path(sys.argv[1], schema="newick", suppress_internal_node_taxa=True, suppress_leaf_node_taxa=True)
    for i, nd in enumerate(t.preorder_node_iter()):
        nd.data={'preorder_index':i}
    t.set_node_ages()
    #print(t.find_node_with_label("Primates").data)
    t.ladderize(ascending=True)
    with open('test_leaves.csv', 'w+') as l, open('test_nodes.csv', 'w+') as n:
        node_extras=OrderedDict()
        node_extras['preorder index']=['preorder_index']
        t.write_preorder_to_csv(l,{},n,node_extras,-1)
Exemple #38
0
    def topology_counter(self, rooted=False, outgroup=None):
        """
        Counts the number of times that each topology appears as outputted by
        running RAxML.

        Output:
        topologies_to_counts --- a dictionary mapping topologies to the number of times they appear
        """

        # Initialize a dictionary mapping newick strings to unique topologies
        unique_topologies_to_newicks = {}

        # taxon names
        tns = dendropy.TaxonNamespace()

        # Create a set of unique topologies
        unique_topologies = set([])

        # Get the topology files from the "Topologies" folder
        input_directory = "Topologies"

        # Initialize topology_count to a defaultdict
        topologies_to_counts = defaultdict(int)

        # Iterate over each file in the given directory
        for filename in os.listdir(input_directory):

            # Create a boolean flag for determining the uniqueness of tree
            new_tree_is_unique = True

            # If file is the file with the best tree newick string
            if os.path.splitext(filename)[0] == "Topology_bestTree":
                input_file = os.path.join(input_directory, filename)

                new_tree = Tree.get_from_path(input_file,
                                              'newick',
                                              taxon_namespace=tns)

                if rooted:
                    outgroup_node = new_tree.find_node_with_taxon_label(
                        outgroup)
                    new_tree.to_outgroup_position(outgroup_node,
                                                  update_bipartitions=False)

                # Iterate over each topology in unique_topologies
                for unique_topology in unique_topologies:

                    # Create a tree for each of the unique topologies calculate RF distance compared to new_tree
                    unique_tree = Tree.get_from_string(unique_topology,
                                                       'newick',
                                                       taxon_namespace=tns)
                    rf_distance = treecompare.unweighted_robinson_foulds_distance(
                        unique_tree, new_tree)

                    # If the RF distance is 0 then the new tree is the same as one of the unique topologies
                    if rf_distance == 0:
                        topologies_to_counts[unique_topology] += 1
                        new_tree_is_unique = False
                        new_tree = new_tree.as_string("newick").replace(
                            "\n", "")
                        unique_topologies_to_newicks[unique_topology].add(
                            new_tree)
                        break

                # If the new tree is a unique tree add it to the set of unique topologies
                if new_tree_is_unique:
                    new_tree = new_tree.as_string("newick").replace("\n", "")
                    unique_topologies.add(new_tree)
                    topologies_to_counts[new_tree] += 1
                    unique_topologies_to_newicks[new_tree] = set([new_tree])

        return topologies_to_counts, unique_topologies_to_newicks
Exemple #39
0
import numpy as np
import numpy.linalg as la
from dendropy import Tree, DnaCharacterMatrix
import myPhylo

tree_path = '/home/nehleh/Documents/0_Research/PhD/Data/simulationdata/recombination/exampledataset/exampledataset_RAxML_bestTree'
tree = Tree.get_from_path(tree_path, 'newick')
alignment = DnaCharacterMatrix.get(file=open(
    "/home/nehleh/Documents/0_Research/PhD/Data/simulationdata/recombination/exampledataset/wholegenome.fasta"
),
                                   schema="fasta")

tree2 = Tree.get_from_path(
    '/home/nehleh/Documents/0_Research/PhD/Data/simulationdata/recombination/exampledataset/RerootTree_node12',
    'newick')

pi = [0.317, 0.183, 0.367, 0.133]
rates = [0.000100, 0.636612, 2.547706, 0.000100, 2.151395]
GTR_sample = myPhylo.GTR_model(rates, pi)

column = myPhylo.get_DNA_fromAlignment(alignment)
dna = column[0]
myPhylo.set_index(tree, dna)

print("Original tree:::::::::::::::")
print(tree.as_string(schema='newick'))
print(tree.as_ascii_plot())

LL_normal = myPhylo.computelikelihood(tree, dna, GTR_sample)
W_LL_normal = myPhylo.wholeAlignmentLikelihood(tree, alignment, GTR_sample)
Exemple #40
0
        col = ""
        for t in range(tips):
            col += str(alignment[t][l])
        # LL_vector.append(computelikelihood(tree, col, model))
        LL_vector[:, l] = computelikelihood(tree, col, model)
    return LL_vector


#=======================================================================================================================

pi = [0.2184, 0.2606, 0.3265, 0.1946]
rates = [2.0431, 0.0821, 0, 0.067, 0]
f = 1

tree = Tree.get_from_path(
    '/home/nehleh/0_Research/PhD/Data/simulationdata/recombination/500000/RAxML_bestTree.wholegenometree',
    'newick')
alignment_GTR = dendropy.DnaCharacterMatrix.get(file=open(
    "/home/nehleh/0_Research/PhD/Data/simulationdata/recombination/500000/wholegenome.fasta"
),
                                                schema="fasta")
# alignment_JC = dendropy.DnaCharacterMatrix.get(file=open("/home/nehleh/0_Research/PhD/Data/LL_vector/JC69_100.fasta"), schema="fasta")

tips = len(alignment_GTR)
alignment_len = alignment_GTR.sequence_size

# GTRGTRvector = []
# JCJCvector = []
# GTRJCvector = []
# JCGTRvector = []
parser.add_argument("-N",
                    "--population",
                    required=True,
                    help="Population size")
parser.add_argument(
    "-g",
    "--growth",
    required=False,
    help="Growing rate (exponential) of the population. Default: 0")
parser.add_argument("-o",
                    "--outputFile",
                    required=False,
                    help="The name of the output tree. Default: stdout")

args = vars(parser.parse_args())

infile = args["inputFile"]
outfile = args["outputFile"] if args["outputFile"] else None

N = float(args["population"])
alpha = int(args["growth"]) if args["growth"] else 0

myTree = Tree.get_from_path(infile, 'newick')

simulateTreeFromTopology(myTree, N, alpha)

if outfile is not None:
    myTree.write_to_path(outfile, 'newick')
else:
    stdout.write(myTree.as_string('newick'))
Exemple #42
0
import dendropy
from dendropy import TreeList,Tree,Taxon,Node
import sys
import argparse
import re

parser = argparse.ArgumentParser(description="Parses a Newick tree file and writes another with subtrees formed by the same species collapsed. It assumes that all samples for each species form a monophyletic group. Leave names are expected to follow the scheme species_\d+_\d+")
parser.add_argument("-i",type=str,default="infile.tree",required=True,help="Input Newick tree file")
parser.add_argument("-o",type=str,default="outtree.tree",required=False,help="Output Newick tree file")
args = parser.parse_args()

tree=Tree.get_from_path(args.i,schema="newick",rooting="force-unrooted")
namespace=tree.taxon_namespace
labels=namespace.labels()
regex=re.compile("(.+) .+ .+")
species=[match.group(1) for label in labels for match in [regex.match(label)] if match]
species_set=set(species)
species=list(species_set)
newNamespace=dendropy.datamodel.taxonmodel.TaxonNamespace()

for specie in species:
	regex=re.compile(specie + " .+ .+")
	leaves=[match.group(0) for label in labels for match in [regex.match(label)] if match]
	mrca_node=tree.mrca(taxon_labels=leaves)
	del mrca_node._child_nodes[:]
	taxon=Taxon(specie)
	mrca_node.taxon=taxon
	newNamespace.add_taxon(taxon)

tree.taxon_namespace=newNamespace
tree.write(path=args.o,schema="newick",suppress_rooting=True)
Exemple #43
0
    def windows_to_newick(self,
                          top_topologies_to_counts,
                          unique_topologies_to_newicks,
                          rooted=False,
                          outgroup=None):
        """
        Creates a dictionary of window numbers to the topology of that window if
        the newick string contained in the window is a top topology; otherwise the
        window number is mapped to "Other".
        Input:
        unique_topologies_to_newicks -- a mapping outputted by topology_counter()
        Returns:
        wins_to_tops --- a dictionary as described above
        tops_list --- a list of the top topologies
        """

        # Initialize dictionary
        tops_list = top_topologies_to_counts.keys()
        wins_to_tops = {}

        # Iterate over each folder in the given directory
        for filename in natsorted(os.listdir("Topologies")):

            # If file is the file with the topology of the best tree newick string
            if os.path.splitext(filename)[0] == "Topology_bestTree":

                filename = os.path.join("Topologies", filename)

                # Open file and read newick string
                with open(filename) as f:
                    # Read newick string from file
                    newick = f.readline()

                if rooted:
                    # taxon names
                    tns = dendropy.TaxonNamespace()

                    # Create tree root it and return newick string
                    new_tree = Tree.get_from_path(filename,
                                                  'newick',
                                                  taxon_namespace=tns)
                    outgroup_node = new_tree.find_node_with_taxon_label(
                        outgroup)
                    new_tree.to_outgroup_position(outgroup_node,
                                                  update_bipartitions=False)
                    newick = new_tree.as_string("newick").replace("\n", "")

                window_number = int(
                    (os.path.splitext(filename)[1]).replace(".", ""))

                for unique_topology in unique_topologies_to_newicks:

                    # If the newick string is in the set of newick strings corresponding to the unique topology
                    if newick in unique_topologies_to_newicks[unique_topology]:

                        # If the unique topology is a top topology map to it
                        if unique_topology in tops_list:
                            wins_to_tops[window_number] = unique_topology

                        # Otherwise map to "Other"
                        else:
                            wins_to_tops[window_number] = "Other"

                            if "Other" not in tops_list:
                                # Adds "Other" so all topologies are included with top ones
                                tops_list.append("Other")

        return wins_to_tops, tops_list
#! /usr/bin/env python

from dendropy import Tree
from decompose_lib import decompose_by_diameter, compute_group_distance_matrix,place_group_onto_tree 
import sys
import os
from pasta import get_logger
_LOG = get_logger(__name__)


intree_file = sys.argv[1]
grouping_file = sys.argv[2]
nleaf_file = sys.argv[3]
distance_file = sys.argv[4]

t = Tree.get_from_path(intree_file,'newick')

grouping = {}
with open(grouping_file,'r') as f:
    for line in f:
        name, taxon = line.split()
        grouping[taxon] = name
_LOG.info('computing treeMap ... ')
treeMap = place_group_onto_tree(t,grouping)

D = compute_group_distance_matrix(t,treeMap)


with open(distance_file,'w') as f:
    for A,B in D:
        f.write(A + " " + B + " " + str(D[(A,B)]) + "\n")
import dendropy
from dendropy.calculate import treecompare
from dendropy import Tree
import os

protein_dir_set = []
for i in os.listdir('output'):
    if "nex" in i.split("."):
        protein_dir_set.append(str(i))

tns = dendropy.TaxonNamespace()
for i in range(0, len(protein_dir_set)):
    for j in range(i + 1, len(protein_dir_set)):
        tree1 = Tree.get_from_path("output/" + protein_dir_set[i],
                                   "nexus",
                                   taxon_namespace=tns)
        tree2 = Tree.get_from_path("output/" + protein_dir_set[j],
                                   "nexus",
                                   taxon_namespace=tns)

        tree1.encode_bipartitions()
        tree2.encode_bipartitions()
        print(protein_dir_set[i], protein_dir_set[j],
              treecompare.unweighted_robinson_foulds_distance(tree1, tree2))
def reroot(treename):
  tree = Tree.get_from_path("work/" + treename + ".nex", "nexus")
  tree.reroot_at_node(tree.find_node_with_taxon_label("Vampyroteuthis infernalis").parent_node)
  tree.ladderize()
  tree.write_to_path("work/" + treename + ".rooted.nex", "nexus")