Exemple #1
0
def get_sum_of_branches(treepath):
    fin = open(treepath, "r")
    newick = fin.readline().strip()
    t = Tree()
    t.read_from_string(newick.__str__(), "newick")
    fin.close()
    return t.length()
Exemple #2
0
 def __init__(self, **kwargs):
     '''
     Parameters
     ----------
     reference_tree_path: str
         Path to the file containing the reference tree, which is used to
         retroot the tree tree provided to tree
     tree_path: str
         Path to the file containing the tree to be re-rooted. This tree will
         be rerooted at the same position as the tree porovided to the 
         reference_tree
     '''
     reference_tree_path = kwargs.pop('reference_tree_path', None)
     tree_path = kwargs.pop('tree_path')
     
     logging.debug("Importing old tree from file: %s" 
                     % tree_path)
     self.tree = Tree.get(path=tree_path,
                         schema='newick')
     if reference_tree_path:
         logging.debug("Importing reference tree from file: %s" 
                         % reference_tree_path)
         self.reference_tree = Tree.get(path=reference_tree_path,
                                        schema='newick')
     else:
         self.reference_tree = reference_tree_path
     if len(kwargs) > 0:
         raise Exception("Unexpected arguments provided to Decorator class: %s" % kwargs)
Exemple #3
0
    def __bisect__(t,e):
#        e = __find_centroid_edge__(t)
        
        u = e.tail_node
        v = e.head_node

        u.remove_child(v)
        t1 = Tree(seed_node = v)

        if u.num_child_nodes() == 1:
            p = u.parent_node
            v = u.child_nodes()[0]
            l_v = v.edge_length
            u.remove_child(v)
            if p is None: # u is the seed_node; this means the tree runs out of all but one side
                t.seed_node = v
                return t,t1
            l_u = u.edge_length
            p.remove_child(u)
            p.add_child(v)
            v.edge_length = l_u+l_v
            u = p

        while u is not None:
            __updateNode__(u)
            u = u.parent_node

        t.annotated = True
        t1.annotated = True

        return t,t1
Exemple #4
0
def generate_ATT_from_files(seqaln,
                            mattype,
                            workdir,
                            treefile,
                            otu_json,
                            ingroup_mrca=None):
    """Build an ATT object without phylesystem.
    If no ingroup mrca ott_id is provided, will use all taxa in tree to calc mrca."""
    aln = DnaCharacterMatrix.get(path=seqaln, schema=mattype)
    for tax in aln.taxon_namespace:
        tax.label = tax.label.replace(" ", "_") #Forcing all spaces to underscore UGH
    tre = Tree.get(path=treefile,
                   schema="newick",
                   preserve_underscores=True,
                   taxon_namespace=aln.taxon_namespace)
    with open(otu_json) as data_file:
        otu_dict = json.load(data_file)
    for tax in aln:
        assert tax.label in otu_dict
    tre = Tree.get(path=treefile,
                   schema="newick",
                   preserve_underscores=True,
                   taxon_namespace=aln.taxon_namespace)
    otu_newick = tre.as_string(schema="newick")
    if ingroup_mrca:
        ott_mrca = int(ingroup_mrca)
    else:
        ott_ids = [otu_dict[otu].get['^ot:ottId'] for otu in otu_dict]
        ott_mrca = get_mrca_ott(ott_ids)
    return AlignTreeTax(otu_newick, otu_dict, aln, ingroup_mrca=ott_mrca, workdir=workdir)
Exemple #5
0
 def get_subtree(self, taxa):
     if len(taxa) == 0:
         return None
     tree = Tree(self._tree)        
     if isinstance(taxa[0],str):
         tree.prune_taxa_with_labels(taxa)
     elif isinstance(taxa[0],Taxon):
         tree.prune_taxa(taxa)
     return PhylogeneticTree(tree)
Exemple #6
0
def scale_tree_branch(tree, format="newick"):
        tree_obj = None
        if os.path.exists(tree):
                tree_obj = Tree.get_from_path(tree, format)
        elif isinstance(tree, str):
                tree_obj = Tree(stream=StringIO(tree), schema=format)
        elif isinstance(tree, Tree):
                tree_obj = Tree
        if sum([ e.length > 1 for e in tree_obj.postorder_edge_iter()]):
                for e in tree_obj.postorder_edge_iter():
                        if e.length is not None:
                                e.length = e.length/100
        return tree_obj.as_newick_string()
Exemple #7
0
 def test_bootstraps_in_annotated_tree_alongside_empty_taxa(self):
     self.assertEquals({u'a': [],
                        u'b': [],
                        u'c': ['tax'],
                        u'd': ['tax']},
                       TaxonomyExtractor().taxonomy_from_annotated_tree(\
                         Tree.get(data="(a,(b,(c,d:0.2)'0.2:tax')0.01973:0.9)root;", schema='newick')))
Exemple #8
0
 def __init__(self, workDir, resultsFile, inFile, coreId, seqType, seedNum, bootNum, method, interLeaved):
     '''
     Data Fields:
     work_dir   = temproray directory
     inFile     = data file,
     id         = core id (int), 
     seqType    = d (dna); p (protein); r (rna), 
     bootNum    = number of replicates,
     seedNum    = Random number seed between 1 and 32767
     method     =b (Bootstrap) Default, 
                 j (Jackknife)
                 c (Permute species for each character)
                 o (Permute character order)
                 s (Permute within species)
                 r (Rewrite data)),
     interLeaved=True if sequence data is interleaved otherwise False
     '''
     self.work_dir       = workDir
     self.resultsFile    = resultsFile
     self.inFile         = inFile 
     self.coreId         = coreId
     self.seqType        = seqType
     self.seedNum        = seedNum
     self.bootNum        = bootNum
     self.method         = method
     self.n              = bootNum
     self.nt             = False
     self.interLeaved    = interLeaved
     self.outFile        = "bootstrap_"+str(coreId)+".out"
     self.newSpeciesTree = Tree()
     self.leafLabelStree = []
     self.internalExRootSpeceLabels= []
     
     if self.seqType in ['r', 'd']:
         self.nt = True
def readTreeFromFile( treePath):
    '''
    input: path to the file containing newick tree
    return Tree object 
    '''
    myTree= Tree.get_from_path(treePath, 'newick', annotations_as_nhx=True, extract_comment_metadata=True , suppress_annotations=False)
    return myTree
def get_bls(tree_path):
    # clean the tree of any support values, so we're left only with BLs
    bls = []
    t = Tree()
    t.read_from_path( tree_path, "newick" )
    
    i = t.level_order_edge_iter()
    while True:
        try:
            e = i.next() # in Python 2.x
            len = e.length
            if len != None:
                bls.append( len )
        except StopIteration:
            break
    return bls
def main():
    
    cpu = sys.argv[1]
    job_name = sys.argv[2]
    
    try:
        alnfile = sys.argv[3]
    except:
        assert(restart is True), "Specified alignment file does not exist. Path?"
    try:
        treefile = sys.argv[4]
    except:
        assert(restart is True), "Specified tree file does not exist. Path?"
    
    # Rewrite tree to create trifurcating root, as needed by phylobayes mpi
    tree = Tree.get_from_path(treefile, "newick", rooting = "force-unrooted")
    tree.resolve_polytomies() # in case of polytomies.
    tree.update_bipartitions() # this will create a trifurcating root on an unrooted tree
    tstring = str(tree).replace('[&U] ', '')
    with open('temp.tre', 'w') as tf:
        tf.write(tstring + ';\n')
        
    # Phylobayes is run to chain length 5500, sampling every 5 to yield 1100. Later, burnin of 100 is removed to get a final posterior n=1000 (same procedure as Rodrigue 2013 Genetics)
    pb_call = "mpirun -np " + str(cpu) + " ./pb_mpi -mutsel -cat -d " + alnfile + " -T temp.tre -x 5 1100 " + job_name
    
    run_pb_call = subprocess.call(pb_call, shell = True)
    assert( run_pb_call == 0 ), "pb_mpi didn't run!"
    
    # Parse output with readpb_mpi, using a burnin of 100 and saving everything else (posterior size = 1000)
    readpb_call = "mpirun -np " + str(cpu) + " ./readpb_mpi -x 100 1 -1 " + job_name + "\n"
    run_readpb_call = subprocess.call(readpb_call, shell = True)
    assert( run_readpb_call == 0 ), "readpb_mpi didn't run!"
def get_tree_lines(Tname):
	stringlist =[]
	from dendropy import Tree
	tree = Tree.get_from_path(Tname,"newick")
	for nd in tree.postorder_internal_node_iter():
	    for child in nd.child_nodes():
	        stringlist.append(child.as_newick_string())
	return (stringlist)
Exemple #13
0
def ete_to_dendropy(tree):
    from dendropy import Tree as DTree
    char_matrix = ete_to_dendropy_cm(tree)
    taxon_namespace = char_matrix.taxon_namespace
    dendro_tree = DTree.get(data=tree.write(format=1),
                            schema='newick',
                            taxon_namespace=taxon_namespace)
    return dendro_tree, char_matrix
Exemple #14
0
    def bipartition_by_edge(self, e):
        """Prunes the subtree that attached to the head_node of edge e and returns them as a separate tree."""

        t = self._tree
        nr = e.head_node
        assert e.tail_node is not None
        assert e.head_node is not None
        assert nr.parent_node is e.tail_node
        is_valid_tree(t)

        n = self.n_leaves
        potentially_deleted_nd = e.tail_node
        grandparent_nd = potentially_deleted_nd.parent_node
        e.tail_node.remove_child(nr, suppress_unifurcations=True)

        nr.edge.length = None
        nr.parent_node = None
        convert_node_to_root_polytomy(nr)
        t1 = PhylogeneticTree(Tree(seed_node=nr))
        n1 = t1.n_leaves # temp we could speed this up, by telling the Phylogenetic tree how many leaves it has

        if hasattr(e, "num_leaves_below"):
            if grandparent_nd is None:
                old_root = potentially_deleted_nd
                if old_root.edge:
                    old_root.edge.num_leaves_below -= n1
            else:
                if potentially_deleted_nd in grandparent_nd.child_nodes():
                    potentially_deleted_nd.edge.num_leaves_below -= n1
                old_root = grandparent_nd
                if old_root.edge:
                    old_root.edge.num_leaves_below -= n1
                while old_root.parent_node:
                    old_root = old_root.parent_node
                    if old_root.edge:
                        old_root.edge.num_leaves_below -= n1
        else:
            old_root = grandparent_nd or potentially_deleted_nd
            while old_root.parent_node:
                old_root = old_root.parent_node

        t2 = PhylogeneticTree(Tree(seed_node=old_root))

        is_valid_tree(t1._tree)
        is_valid_tree(t2._tree)
        return t1, t2
Exemple #15
0
 def readTreeFromString(self, treeString):
     '''
     input: string containing newick tree
     return Tree object 
     '''
     myTree= Tree()
     myTree= Tree.get_from_string( treeString, 'newick', annotations_as_nhx=True, extract_comment_metadata=True , suppress_annotations=False)
     return myTree
Exemple #16
0
def remove_internal_labels(strtree):
    tree = Tree.get_from_string(strtree, schema='newick')
    for node in tree.postorder_node_iter():
        if not node.label is None:
            if (int(node.label) >= tips_number):
                node.label = None

    return tree.as_string(schema="newick")
Exemple #17
0
def read_lsd_results(inputDir):
# suppose LSD was run on the "mytree.newick" and all the outputs are placed inside inputDir
    log_file = normpath(join(inputDir, "mytree.tre.result")) 
    input_tree_file = normpath(join(inputDir, "mytree.tre")) 
    result_tree_file = normpath(join(inputDir, "mytree.tre.result.newick")) 

    s = open(log_file,'r').read()
    i = s.find("Tree 1 rate ") + 12
    mu = ""
    found_dot = False

    while (s[i] == '.' and not found_dot) or  (s[i] in [str(x) for x in range(10)]):
        mu += s[i]
        if s[i] == '.':
            found_dot = True
        i += 1
    mu = float(mu)

    taxa = TaxonNamespace()
    tree = Tree.get_from_path(input_tree_file,schema="newick",taxon_namespace=taxa,rooting="force-rooted") 
    tree.encode_bipartitions()
    n = len(list(tree.leaf_node_iter()))
    N = 2*n-2
    x0 = [10**-10]*N + [mu]
    
    idx = 0
    brlen_map = {}
    
    for node in tree.postorder_node_iter():
        if not node is tree.seed_node:
            key = node.bipartition
            brlen_map[key] = (idx,node.edge_length)
            idx += 1

    tree2 = Tree.get_from_path(result_tree_file,schema="newick",taxon_namespace=taxa,rooting="force-rooted")
    tree2.encode_bipartitions()
    
    for node in tree2.postorder_node_iter():
        if not node is tree2.seed_node:
            key = node.bipartition
            idx,el = brlen_map[key]
            if el > 0 and node.edge_length>0:
                x0[idx] = node.edge_length/float(el)

    return x0        
Exemple #18
0
 def test_branch_lengths(self):
     '''https://github.com/geronimp/graftM/issues/192'''
     taxes = TaxonomyExtractor().taxonomy_from_annotated_tree(
         Tree.get(path=os.path.join(path_to_data, 'create', 'sulfitereductase.ben.tree'), schema='newick'))
     self.assertEquals([u'Aanerobic sulfite reductase asrC',
                        u'Anaerobic sulfite reductase asrC Group 3',
                        u'Unknown alpha and beta subunits',
                        u'0.856_PFAM_NIR_SIR,NIR_SIR_ferr'], # number is actually in the clade name
                       taxes['T506DRAFT_scaffold00010.10_60~2561511230'])
def index_mutations(con):
    """Builds an index of all mutations"""
    cur = con.cursor()
    
    for msaid in get_alignment_method_ids(con):
        for modelid in get_phylo_modelids(con):
            newick = get_anc_cladogram(con, msaid, modelid)
            t = Tree()
            t.read_from_string(newick, "newick")
            for edge in t.preorder_edge_iter():
                if edge.head_node == None or edge.tail_node == None:
                    continue
                if edge.head_node.label == None or edge.tail_node.label == None:
                    continue 
                print msaid, modelid, edge.head_node.label, edge.tail_node.label
                anc1name = "Node" + edge.head_node.label.__str__()
                anc2name = "Node" + edge.tail_node.label.__str__()
                index_mutations_helper(con, msaid, modelid, anc1name, anc2name)
Exemple #20
0
 def test_remove_sequences_with_named_internal_nodes(self):
     tc  = DendropyTreeCleaner()
     tree = Tree.get(data="('Asulf_Archaeoglobus.1_2280~2522125074':7.17,(('Afulgi_764~2528311132':0.0,'CP006577_764~2588253768':0.0):0.0,'AE000782_746~638154502':0.0)'s__Archaeoglobus fulgidus':7.555):1.461;\n",
                     schema='newick')
     tc.remove_sequences(tree,
                         ['CP006577_764~2588253768',
                          'Afulgi_764~2528311132'])
     self.assertEqual("(Asulf_Archaeoglobus.1_2280~2522125074:7.17,AE000782_746~638154502:7.555):1.461",
                      str(tree))
Exemple #21
0
def check_list_against_tree(treepath, checklist):
    '''take a path to a newick tree file and look for any taxa that correspond to keys in the "keep" dictionary,
    incrementing the value of each one found'''
    with open(treepath, 'r', encoding='UTF-8') as treefile:
        check_list_against_taxa(
            Tree.get_from_stream(treefile,
                                 schema="newick",
                                 preserve_underscores=True,
                                 rooting='default-rooted'), checklist)
Exemple #22
0
def test_yule(script_runner, execution_number, datadir):
    backbone = os.path.join(datadir, "stem2.backbone.tre")
    taxonomy = os.path.join(datadir, "stem2.taxonomy.tre")
    taxed = Tree.get(path=taxonomy, schema="newick")
    bbone = Tree.get(path=backbone, schema="newick", rooting="default-rooted")
    result = script_runner.run("tact_add_taxa", "--taxonomy", taxonomy,
                               "--backbone", backbone, "--output",
                               ".tact-pytest-yule", "-vv", "--yule")
    assert result.returncode == 0
    output = ".tact-pytest-yule.newick.tre"
    tacted = Tree.get(path=output, schema="newick", rooting="default-rooted")
    ss = tacted.as_ascii_plot()
    sys.stderr.write(ss)
    result = script_runner.run("tact_check_results", output, "--taxonomy",
                               taxonomy, "--backbone", backbone, "--output",
                               ".tact-pytest-yule.check.csv", "--cores=1")
    assert result.returncode == 0
    return (tacted, taxed, bbone)
Exemple #23
0
def tree_compare(tempdir):
    # CHANGE to tempdir
    tns = dendropy.TaxonNamespace()
    tree1 = Tree.get_from_path(tempdir + "/ref.tree",
                               "newick",
                               taxon_namespace=tns)
    tree2 = Tree.get_from_path(tempdir + "/normal_tree",
                               "newick",
                               taxon_namespace=tns)
    tree3 = Tree.get_from_path(tempdir + "/red_tree",
                               "newick",
                               taxon_namespace=tns)
    tree1.encode_bipartitions()
    tree2.encode_bipartitions()
    tree3.encode_bipartitions()
    distance_normal = treecompare.symmetric_difference(tree1, tree2)
    distance_reduced = treecompare.symmetric_difference(tree1, tree3)
    return distance_normal, distance_reduced
Exemple #24
0
def scale_tree(f_name, n):

    t = Tree.get(file=open(f_name, 'r'), schema="newick", tree_offset=0)

    for e in t.edges():
        if e.length is not None:
            e.length = float(n*float(e.length))
    
    t.write(file=open(f_name.replace('.mt', '') + '_' + str(n).replace('.', '_') + '.mt', 'w+'), schema="newick")
def remove_branch_lengths(f, out):

    t = Tree.get(file=open(f, 'r'), schema="newick")
    new = open(out, 'w+')

    for e in t.edges():
        e.length = None

    t.write(file=new, schema="newick")
def get_tree_and_OTT_list(tree_filehandle, sources, verbosity=0):
    """
    Takes a base tree and creates objects for each node and leaf, attaching them as 'data' dictionaries
    to each node in the DendroPy tree. Nodes and leaves with an OTT id also have pointers to their data 
    dicts stored in an OTT-keyed dict, so that mappings to other databases (ncbi id, etc etc) can be created.
    
    We can easily have duplicate leaf names, so for the entire procedure we ignore the Dendropy concept
    of a taxon list and simply use labels.
    Returns the Dendropy tree and the OTT dict.
    """
    #these variables are all pointers into the same data
    ordered_leaves=[]
    ordered_nodes=[]
    indexed_by_ott={}
    
    try:
        tree = Tree.get_from_stream(tree_filehandle, schema="newick", preserve_underscores=True, suppress_leaf_node_taxa=True)
    except:
        sys.exit("Problem reading tree from " + treefile.name)
    info("-> read tree from " + tree_filehandle.name)
    
    ott_node = re.compile(r"(.*) ott(\d+)(@\d*)?$") #matches the OTT number
    mrca_ott_node = re.compile(r"(.*) (mrcaott\d+ott\d+)(@\d*)?$") #matches a node with an "mrca" node number (no unique OTT)
    for i, node in enumerate(tree.preorder_node_iter()):
        node.data = {'parent':node.parent_node or None}
        if node.label:
            node.label = node.label.replace("_"," ")
            m = ott_node.search(node.label)
            if m is not None:
                if m.group(3):
                    warn("Node has an @ sign at the end ({}), meaning it has probably not been substituted by an OpenTree equivalent. You may want to provide an alternative subtree from this node downwards, as otherwise it will probably be deleted from the main tree.".format(node.label))
                node.label = m.group(1)
                node.data['ott'] = int(m.group(2))
                indexed_by_ott[node.data['ott']] = node.data
                node.data['sources']={k:None for k in sources}
            else:
                m = mrca_ott_node.search(node.label)
                if m is not None:
                    if m.group(3):
                        warn("Node has an @ sign at the end ({}), meaning it has probably not been substituted by an OpenTree equivalent. You may want to provide an alternative subtree from this node downwards, as otherwise it will probably be deleted from the main tree.".format(node.label))
                    node.label = m.group(1)
                    #this is an 'mrca' node, so we want to save sources but *not* save the ott number in node.data
                    indexed_by_ott[m.group(2)] = node.data
                    node.data['sources']={k:None for k in sources}
                elif node.is_leaf():
                    warn("Leaf without an OTT id: '{}'. This will not be associated with any other data".format(node.label))
            #finally, put underscores at the start or the end of the new label back
            #as these denote "fake" names that are hidden and only used for mapping
            #we could keep them as spaces, but leading/trailing underscores are easier to see by eye
            if node.label[0]==" ":
                node.label = "_" + node.label[1:]
            if node.label[-1]==" ":
                node.label = node.label[:-1] + "_"

    info("-> extracted {} otts from among {} leaves and nodes".format(len(indexed_by_ott), i))
    return tree, indexed_by_ott
Exemple #27
0
def main():
    d1 = sys.argv[1]
    d2 = sys.argv[2]
    d1_name = basename(d1)
    d2_name = basename(d2)
    print('og {} {}'.format(d1_name, d2_name))
    d1_files = list(sorted(glob(join(d1, '*', 'RAxML_bipartitions.bipart'))))
    d2_files = list(sorted(glob(join(d2, '*', 'RAxML_bipartitions.bipart'))))
    assert len(d1_files) == len(d2_files)
    for fn1, fn2 in zip(d1_files, d2_files):
        t1 = Tree.get(path=fn1, schema='newick')
        t2 = Tree.get(path=fn2, schema='newick')
        assert tostr(t1) == tostr(t2)
        t1_og = basename(dirname(fn1))
        t2_og = basename(dirname(fn2))
        assert t1_og == t2_og
        labs = zip(get_node_labels(t1), get_node_labels(t2))
        for l1, l2 in labs:
            print(t1_og, l1, l2)
Exemple #28
0
def evaluate(ref, file_name):

    # To store the data during the process, we create two temporary files.
    tmp1 = tempfile.mkstemp()
    tmp2 = tempfile.mkstemp()

    # Use the commands of fastprot and fnj.
    # The output of the FastPhylo programs is in file 'tmp2'.
    os.system("fastprot -m -o " + tmp1[1] + " " + file_name)
    os.system("fnj -O newick -m FNJ -o " + tmp2[1] + " " + tmp1[1])

    #Use Dendropy to compare the trees.
    in_tree = Tree.get_from_stream(os.fdopen(tmp2[0]),
                                   schema='newick',
                                   taxon_namespace=tns)
    ref_tree = Tree.get_from_path(ref, schema='newick', taxon_namespace=tns)
    sym_diff = treecompare.symmetric_difference(ref_tree, in_tree)

    return sym_diff
Exemple #29
0
    def test_ben_bug(self):
        new_tree_newick = '(646366661:0.00571,(646777089:0.01427,(2556226606:0.0,2517129521:0.0):0.04312)0.377:0.01170,((650856936:0.01153,(((((646367708:0.01465,(638201361:0.00187,646622935:0.00573)0.940:0.01352)0.988:0.02634,(2519841469:0.06952,(650856136:0.01840,2506713669:0.02486)0.774:0.00888)0.893:0.01193)0.981:0.03778,((649738338:0.07504,(638155665:0.00613,648151945:0.00304)0.995:0.05973)0.884:0.02836,((650752390:0.11644,(2516847065:0.01707,2520801411:0.03442)0.993:0.04619)0.940:0.03278,(640592705:0.14347,637846211:0.11851)0.971:0.04593)0.940:0.03067)0.943:0.03401)0.998:0.06483,(638168675:0.17080,((649738388:0.09935,((((2540854716:0.00325,2553937573:0.00406)1.000:0.09930,(646533023:0.09868,640592823:0.06908)0.951:0.03770)1.000:0.07636,(650872422:0.05527,(650750471:0.05106,(2516847513:0.01440,2520803234:0.02517)0.998:0.05067)0.947:0.03589)0.074:0.01784)0.786:0.03804,(638155700:0.00445,648151981:0.00284)0.894:0.02131)0.995:0.08448)0.999:0.10690,((KYC55281.1:0.28954,(2540666849:0.26647,(2555938320:0.04589,2518907621:0.04631)0.970:0.05624)1.000:0.12340)0.993:0.09723,(((2515321874:0.26529,((637699780:0.01317,(2540563143:0.01361,(638165755:0.01099,638179449:0.01674)0.558:0.00611)0.964:0.01965)1.000:0.10518,(2502870849:0.06989,(648055573:0.14431,(646706666:0.11338,(637960147:0.03570,(2509663319:0.04930,(2519472088:0.03452,2515107634:0.07709)0.639:0.02134)0.957:0.03004)0.316:0.01755)0.809:0.02055)0.323:0.01213)0.991:0.07286)0.974:0.06071)0.997:0.09000,(650797088:0.06590,(639699575:0.03533,2512008957:0.12951)0.779:0.03900)1.000:0.21062)0.685:0.04985,((((640867801:0.08102,(2507462304:0.07476,(643570914:0.08474,((637897753:0.11959,((2509037835:0.14386,(648194984:0.08665,(648195418:0.04239,2506476786:0.04237)0.993:0.03477)0.668:0.01876)0.510:0.00983,((((640115295:0.06428,2540643958:0.01655)1.000:0.05043,2540643737:0.02645)0.502:0.01269,640115052:0.02482)0.987:0.04272,(2507147269:0.04181,2507146024:0.06962)0.615:0.01449)0.611:0.01456)0.992:0.03807)0.542:0.02193,((2525334810:0.02116,640099739:0.01544)0.785:0.00549,(640100248:0.00446,2525335778:0.02444)0.750:0.00227)1.000:0.12583)0.944:0.03489)0.962:0.04171)0.986:0.04499)0.793:0.02738,2509039570:0.05560)1.000:0.18840,(2505968448:0.05750,(2505971857:0.03133,2512783668:0.02305)0.185:0.01848)0.998:0.08344)0.875:0.04885,2518787893:0.16350)0.868:0.04436)1.000:0.14016)0.957:0.05998)0.998:0.08120)0.984:0.05479)0.999:0.07664,(2506713165:0.01408,((650917784:0.03595,640788680:0.07226)0.510:0.02178,(2519842728:0.03972,(646859549:0.04217,(2511672461:0.01672,(640786544:0.03901,(640793336:0.00334,(640165512:0.02037,641283602:0.00189)0.147:0.00210)0.175:0.00497)0.641:0.01093)0.991:0.02914)1.000:0.05940)0.323:0.02509)0.977:0.02843)0.960:0.02127)0.499:0.01743)0.998:0.03986,(638202197:0.00190,(644970377:0.01516,646623830:0.00752)0.678:0.00364)0.903:0.00939)0.626:0.01605);'
        old_tree_newick = '(((((646366661:0.00564,((2517129521:0,2556226606:0):0.04302,646777089:0.01412)0.499:0.01173)0.999:0.07494,(638202197:0.0019,(644970377:0.01507,646623830:0.0075)0.738:0.00362)0.872:0.00931)0.635:0.01598,650856936:0.01308)0.995:0.04,(2506713165:0.01171,((640788680:0.07255,650917784:0.03571)0.466:0.02189,(2519842728:0.03945,(646859549:0.04217,(2511672461:0.01668,(640786544:0.03894,(640793336:0.00335,(640165512:0.02038,641283602:0.0019)0.155:0.00211)0.174:0.00496)0.668:0.01095)0.987:0.02908)1.000:0.05973)0.285:0.02499)0.985:0.02844)0.967:0.02183)0.668:0.01914,(((646367708:0.01473,(638201361:0.00188,646622935:0.00578)0.947:0.01381)0.981:0.02381,(2519841469:0.06809,(650856136:0.0182,2506713669:0.02508)0.727:0.0089)0.915:0.01182)0.971:0.03729,((649738338:0.07412,(648151945:0.00339,638155665:0.00581)0.999:0.05834)0.847:0.02714,((650752390:0.11368,(2516847065:0.01707,2520801411:0.03449)0.997:0.04731)0.943:0.033,(640592705:0.14071,637846211:0.11886)0.974:0.04714)0.907:0.02899)0.938:0.03341)0.999:0.06584,(638168675:0.16751,((649738388:0.09339,((((2540854716:0.00327,2553937573:0.00411)1.000:0.1001,(640592823:0.06966,646533023:0.10017)0.945:0.03786)0.999:0.07732,(650872422:0.0565,(650750471:0.05131,(2516847513:0.01447,2520803234:0.02528)0.998:0.05086)0.932:0.03598)0.014:0.01864)0.823:0.03947,(648151981:0.00285,638155700:0.0045)0.891:0.01979)0.998:0.08667)0.999:0.11839,((2540666849:0.25749,(2518907621:0.04615,2555938320:0.04332)0.979:0.0624)1.000:0.20869,(((2515321874:0.27886,((637699780:0.01253,(2540563143:0.01408,(638165755:0.01119,638179449:0.01933)0.559:0.0062)0.973:0.01906)1.000:0.10181,(((2509663319:0.05476,(2515107634:0.07727,2519472088:0.03736)0.549:0.02018)0.970:0.02691,(637960147:0.03454,646706666:0.12022)0.328:0.01393)0.935:0.02456,(2502870849:0.07124,648055573:0.13944)0.419:0.01578)0.997:0.07265)0.976:0.05902)0.992:0.09155,(650797088:0.06773,(639699575:0.03844,2512008957:0.12921)0.700:0.03684)1.000:0.19382)0.774:0.0559,((2518787893:0.1617,(2512783668:0.01562,(2505971857:0.02991,2505968448:0.06931)0.820:0.01544)1.000:0.10132)0.000:0.03917,((640867801:0.07687,(2507462304:0.07769,(((637897753:0.11989,((2509037835:0.14075,(648194984:0.08661,(648195418:0.04254,2506476786:0.04232)0.986:0.0348)0.722:0.0191)0.553:0.00998,((((640115295:0.06424,2540643958:0.01643)1.000:0.05042,2540643737:0.02653)0.542:0.01245,640115052:0.0251)0.986:0.04265,(2507146024:0.06963,2507147269:0.0417)0.641:0.01435)0.611:0.01449)0.989:0.03824)0.424:0.02187,((640099739:0.01547,2525334810:0.02122)0.833:0.00545,(640100248:0.00445,2525335778:0.02442)0.761:0.0023)1.000:0.12528)0.944:0.03961,643570914:0.0938)0.959:0.03885)0.984:0.04627)0.758:0.02972,2509039570:0.05614)1.000:0.19872)0.185:0.04422)0.999:0.13795)0.646:0.06267)0.977:0.07517)0.979:0.05715);'
        old_tree = Tree.get(schema='newick', data=old_tree_newick)
        tree_to_reroot = Tree.get(schema='newick', data=new_tree_newick)
        r = Rerooter()
        reann = Reannotator()

        new_tree = r.reroot_by_tree(
            r.reroot(old_tree),
            r.reroot(tree_to_reroot))

        expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes()
        expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes()
        for tip in expected_lefts:
            self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[1].leaf_nodes()])
        for tip in expected_rights:
            self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[0].leaf_nodes()])
        self.assertEqual(len(tree_to_reroot.leaf_nodes()), len(new_tree.leaf_nodes()))
Exemple #30
0
def return_trees_from_trace(path):
    print "Parsing trace:", path
    trees = []
    lnls = []
    fin = open(path, "r")
    last_tree = None
    last_lnl = 0.0
    count_unique_trees = 0
    for line in fin.xreadlines():
        treestring = ""
        lnlstring = ""
        found_tree = False
        for c in line:
            if found_tree == False and c != "]" and c != "[" and c != "(":
                lnlstring += c
            if c == "(":
                found_tree = True
            if found_tree == True:
                treestring += c
        lnl = float(lnlstring)
        t = Tree()
        t.read_from_string(line, "newick")
        if last_tree != None: #2nd->nth trees in the list
            #sd = last_tree.symmetric_difference(t)
            #sd = t.symmetric_difference(last_tree)
            if last_lnl < lnl:
                trees.append(t)
                lnls.append("%.2f"%lnl)
                count_unique_trees += 1
            else:
                trees[trees.__len__()-1] = t
                lnls[lnls.__len__()-1] = "%.2f"%lnl
        else: #first tree in the list
            trees.append(t)
            lnls.append("%.2f"%lnl)
            count_unique_trees += 1
        last_tree = t
        last_lnl = lnl
        print count_unique_trees, lnl
    trees.append(last_tree)
    lnls.append("%.2f"%lnl)
    fin.close()
    return [trees, lnls]
		def __init__(self,ddpTree=None,tree_file=None,schema="newick",Tree_records=[]):
			if tree_file:
				self.ddpTree = Tree.get_from_path(tree_file,schema)
			else:
				#self.ddpTree = copy.deepcopy(ddpTree)
				self.ddpTree = ddpTree
			self.Tree_records = Tree_records
			self.opt_score = None
			self.opt_root = self.ddpTree.seed_node
			self.opt_x = 0
Exemple #32
0
    def assert_tree_equal_no_labels_deprecated(self, expected_newick, observed_tree):
        expected = Tree.get(schema='newick', data=expected_newick)

        for node in expected.nodes(): 
            if not node.is_leaf():
                node.label = None
        for node in observed_tree.nodes(): 
            if not node.is_leaf():
                node.label = None
        self.assertEqual(str(expected), str(observed_tree))
Exemple #33
0
 def test_ben_bug(self):
     new_tree_newick = u'(646366661:0.00571,(646777089:0.01427,(2556226606:0.0,2517129521:0.0):0.04312)0.377:0.01170,((650856936:0.01153,(((((646367708:0.01465,(638201361:0.00187,646622935:0.00573)0.940:0.01352)0.988:0.02634,(2519841469:0.06952,(650856136:0.01840,2506713669:0.02486)0.774:0.00888)0.893:0.01193)0.981:0.03778,((649738338:0.07504,(638155665:0.00613,648151945:0.00304)0.995:0.05973)0.884:0.02836,((650752390:0.11644,(2516847065:0.01707,2520801411:0.03442)0.993:0.04619)0.940:0.03278,(640592705:0.14347,637846211:0.11851)0.971:0.04593)0.940:0.03067)0.943:0.03401)0.998:0.06483,(638168675:0.17080,((649738388:0.09935,((((2540854716:0.00325,2553937573:0.00406)1.000:0.09930,(646533023:0.09868,640592823:0.06908)0.951:0.03770)1.000:0.07636,(650872422:0.05527,(650750471:0.05106,(2516847513:0.01440,2520803234:0.02517)0.998:0.05067)0.947:0.03589)0.074:0.01784)0.786:0.03804,(638155700:0.00445,648151981:0.00284)0.894:0.02131)0.995:0.08448)0.999:0.10690,((KYC55281.1:0.28954,(2540666849:0.26647,(2555938320:0.04589,2518907621:0.04631)0.970:0.05624)1.000:0.12340)0.993:0.09723,(((2515321874:0.26529,((637699780:0.01317,(2540563143:0.01361,(638165755:0.01099,638179449:0.01674)0.558:0.00611)0.964:0.01965)1.000:0.10518,(2502870849:0.06989,(648055573:0.14431,(646706666:0.11338,(637960147:0.03570,(2509663319:0.04930,(2519472088:0.03452,2515107634:0.07709)0.639:0.02134)0.957:0.03004)0.316:0.01755)0.809:0.02055)0.323:0.01213)0.991:0.07286)0.974:0.06071)0.997:0.09000,(650797088:0.06590,(639699575:0.03533,2512008957:0.12951)0.779:0.03900)1.000:0.21062)0.685:0.04985,((((640867801:0.08102,(2507462304:0.07476,(643570914:0.08474,((637897753:0.11959,((2509037835:0.14386,(648194984:0.08665,(648195418:0.04239,2506476786:0.04237)0.993:0.03477)0.668:0.01876)0.510:0.00983,((((640115295:0.06428,2540643958:0.01655)1.000:0.05043,2540643737:0.02645)0.502:0.01269,640115052:0.02482)0.987:0.04272,(2507147269:0.04181,2507146024:0.06962)0.615:0.01449)0.611:0.01456)0.992:0.03807)0.542:0.02193,((2525334810:0.02116,640099739:0.01544)0.785:0.00549,(640100248:0.00446,2525335778:0.02444)0.750:0.00227)1.000:0.12583)0.944:0.03489)0.962:0.04171)0.986:0.04499)0.793:0.02738,2509039570:0.05560)1.000:0.18840,(2505968448:0.05750,(2505971857:0.03133,2512783668:0.02305)0.185:0.01848)0.998:0.08344)0.875:0.04885,2518787893:0.16350)0.868:0.04436)1.000:0.14016)0.957:0.05998)0.998:0.08120)0.984:0.05479)0.999:0.07664,(2506713165:0.01408,((650917784:0.03595,640788680:0.07226)0.510:0.02178,(2519842728:0.03972,(646859549:0.04217,(2511672461:0.01672,(640786544:0.03901,(640793336:0.00334,(640165512:0.02037,641283602:0.00189)0.147:0.00210)0.175:0.00497)0.641:0.01093)0.991:0.02914)1.000:0.05940)0.323:0.02509)0.977:0.02843)0.960:0.02127)0.499:0.01743)0.998:0.03986,(638202197:0.00190,(644970377:0.01516,646623830:0.00752)0.678:0.00364)0.903:0.00939)0.626:0.01605);'
     old_tree_newick = u'(((((646366661:0.00564,((2517129521:0,2556226606:0):0.04302,646777089:0.01412)0.499:0.01173)0.999:0.07494,(638202197:0.0019,(644970377:0.01507,646623830:0.0075)0.738:0.00362)0.872:0.00931)0.635:0.01598,650856936:0.01308)0.995:0.04,(2506713165:0.01171,((640788680:0.07255,650917784:0.03571)0.466:0.02189,(2519842728:0.03945,(646859549:0.04217,(2511672461:0.01668,(640786544:0.03894,(640793336:0.00335,(640165512:0.02038,641283602:0.0019)0.155:0.00211)0.174:0.00496)0.668:0.01095)0.987:0.02908)1.000:0.05973)0.285:0.02499)0.985:0.02844)0.967:0.02183)0.668:0.01914,(((646367708:0.01473,(638201361:0.00188,646622935:0.00578)0.947:0.01381)0.981:0.02381,(2519841469:0.06809,(650856136:0.0182,2506713669:0.02508)0.727:0.0089)0.915:0.01182)0.971:0.03729,((649738338:0.07412,(648151945:0.00339,638155665:0.00581)0.999:0.05834)0.847:0.02714,((650752390:0.11368,(2516847065:0.01707,2520801411:0.03449)0.997:0.04731)0.943:0.033,(640592705:0.14071,637846211:0.11886)0.974:0.04714)0.907:0.02899)0.938:0.03341)0.999:0.06584,(638168675:0.16751,((649738388:0.09339,((((2540854716:0.00327,2553937573:0.00411)1.000:0.1001,(640592823:0.06966,646533023:0.10017)0.945:0.03786)0.999:0.07732,(650872422:0.0565,(650750471:0.05131,(2516847513:0.01447,2520803234:0.02528)0.998:0.05086)0.932:0.03598)0.014:0.01864)0.823:0.03947,(648151981:0.00285,638155700:0.0045)0.891:0.01979)0.998:0.08667)0.999:0.11839,((2540666849:0.25749,(2518907621:0.04615,2555938320:0.04332)0.979:0.0624)1.000:0.20869,(((2515321874:0.27886,((637699780:0.01253,(2540563143:0.01408,(638165755:0.01119,638179449:0.01933)0.559:0.0062)0.973:0.01906)1.000:0.10181,(((2509663319:0.05476,(2515107634:0.07727,2519472088:0.03736)0.549:0.02018)0.970:0.02691,(637960147:0.03454,646706666:0.12022)0.328:0.01393)0.935:0.02456,(2502870849:0.07124,648055573:0.13944)0.419:0.01578)0.997:0.07265)0.976:0.05902)0.992:0.09155,(650797088:0.06773,(639699575:0.03844,2512008957:0.12921)0.700:0.03684)1.000:0.19382)0.774:0.0559,((2518787893:0.1617,(2512783668:0.01562,(2505971857:0.02991,2505968448:0.06931)0.820:0.01544)1.000:0.10132)0.000:0.03917,((640867801:0.07687,(2507462304:0.07769,(((637897753:0.11989,((2509037835:0.14075,(648194984:0.08661,(648195418:0.04254,2506476786:0.04232)0.986:0.0348)0.722:0.0191)0.553:0.00998,((((640115295:0.06424,2540643958:0.01643)1.000:0.05042,2540643737:0.02653)0.542:0.01245,640115052:0.0251)0.986:0.04265,(2507146024:0.06963,2507147269:0.0417)0.641:0.01435)0.611:0.01449)0.989:0.03824)0.424:0.02187,((640099739:0.01547,2525334810:0.02122)0.833:0.00545,(640100248:0.00445,2525335778:0.02442)0.761:0.0023)1.000:0.12528)0.944:0.03961,643570914:0.0938)0.959:0.03885)0.984:0.04627)0.758:0.02972,2509039570:0.05614)1.000:0.19872)0.185:0.04422)0.999:0.13795)0.646:0.06267)0.977:0.07517)0.979:0.05715);'
     old_tree = Tree.get(schema='newick', data=old_tree_newick)
     tree_to_reroot = Tree.get(schema='newick', data=new_tree_newick)
     r = Rerooter()
     reann = Reannotator()
     
     new_tree = r.reroot_by_tree( 
         r.reroot(old_tree),
         r.reroot(tree_to_reroot))
     
     expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes()
     expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes()
     for tip in expected_lefts:
         self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[1].leaf_nodes()])
     for tip in expected_rights:
         self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[0].leaf_nodes()])
     self.assertEqual(len(tree_to_reroot.leaf_nodes()), len(new_tree.leaf_nodes()))
def run_tact(script_runner, datadir, stem):
    backbone = os.path.join(datadir, stem + ".backbone.tre")
    taxonomy = os.path.join(datadir, stem + ".taxonomy.tre")
    taxed = Tree.get(path=taxonomy, schema="newick")
    bbone = Tree.get(path=backbone, schema="newick")
    result = script_runner.run("tact_add_taxa", "--taxonomy", taxonomy,
                               "--backbone", backbone, "--output",
                               ".tact-pytest-" + stem, "-vv")
    assert result.returncode == 0
    output = ".tact-pytest-" + stem + ".newick.tre"
    tacted = Tree.get(path=output, schema="newick")
    ss = tacted.as_ascii_plot()
    sys.stderr.write(ss)
    result = script_runner.run("tact_check_results", output, "--taxonomy",
                               taxonomy, "--backbone", backbone, "--output",
                               ".tact-pytest-" + stem + ".check.csv",
                               "--cores=1")
    assert result.returncode == 0
    return (tacted, taxed, bbone)
Exemple #35
0
    def assert_tree_equal_no_labels_deprecated(self, expected_newick, observed_tree):
        expected = Tree.get(schema='newick', data=expected_newick)

        for node in expected.nodes():
            if not node.is_leaf():
                node.label = None
        for node in observed_tree.nodes():
            if not node.is_leaf():
                node.label = None
        self.assertEqual(str(expected), str(observed_tree))
Exemple #36
0
def pretty_print_trees():    
    print "\n. OK, I'm reformatting the RAxML results for nice printing..."
    """Reformats the phylogeny, such that each taxon label looks like this:
    trna12-AlaTCT[6/7]
    . . . where 6 is the number of sequences collapsed into this sequence, and 7 is the number of total tRNAs in the databse."""
    species_list = species_trna_seq.keys()
    species_list.sort()
    for species in species_list:
        #print species_trna_dups[species]
        treepath = RAXMLDIR + "/RAxML_result." + species
        if False == os.path.exists( treepath ):
            continue
        newtreepath = TREEDIR + "/" + species + ".tree"
        t = Tree()
        t.read_from_path(treepath, "newick")
        print " -->", treepath
        trna_count = count_trna_types(species)
        #print trna_count
        newts = t.__str__()
        for taxon in t.taxon_set:
            #print "372:", taxon.label
            #thisac = get_ac_from_name(taxon.label)
            thisac = species_trna_mtrip[species][taxon.label]
            count_this_type = trna_count[thisac]
            count_dups = 0
            if taxon.label in species_trna_dups[species]:
                count_dups = species_trna_dups[species][taxon.label].__len__() + 1
            if count_dups <= 1:
                count_dups = ""
            else:
                count_dups = "(" + count_dups.__str__() + ")"

            mark = ""
            if species in species_switchedtrnas:
                print "534:", species_switchedtrnas[species]
                if species_switchedtrnas[species].__contains__(taxon.label):
                    mark = "***"

            newts = re.sub( taxon.label, (taxon.label + count_dups + "[" + count_this_type.__str__()+ "]" + mark), newts)
        fout = open(newtreepath, "w")
        fout.write( newts + "\n" )
        fout.close()
Exemple #37
0
def write_and_read_nexus(filename, header, tree_id, tree_str):
    tns = TaxonNamespace(is_case_sensitive=True)
    # write a temp file containing  tree
    with open(filename, "w") as f:
        for line in header + ["tree " + tree_id + " " + tree_str]:
            f.write(line + "\n");
    # read tree as dendropy tree
    tree = Tree.get(path=filename, schema="nexus",
                    taxon_namespace=tns, case_sensitive_taxon_labels=True, 
                    suppress_internal_node_taxa=False)
    return tree
Exemple #38
0
def main():
    from sys import argv

    treefile = argv[1]

    t = Tree.get_from_path(treefile, "newick")

    R = resolve_tree(t)

    for s in R:
        print(s)
Exemple #39
0
def main(OT_filehandle, OTTs_to_keep, outfile):
    #read in tree, but don't create taxa (faster)\
    tree = Tree.get(stream=OT_filehandle, schema="newick", suppress_leaf_node_taxa=True)
    for node in tree.postorder_node_iter():
        if hasattr(node, 'keep') or node_label_in(node, OTTs_to_keep):
            if node.parent_node: #this is not the root
                node.parent_node.keep=True
        else:
            if not hasattr(node, 'keep'):
                node.parent_node.remove_child(node, suppress_unifurcations=False)
    tree.write(file=outfile, schema='newick', suppress_leaf_node_labels=False) 
Exemple #40
0
def compute_tree_distances(con):
    cur = con.cursor()
    cur.execute("insert or replace into TreeDistanceMetrics(metricid, name) values(1, 'symmetric')")
    cur.execute("insert or replace into TreeDistanceMetrics(metricid, name) values(2, 'euclidean')")
    con.commit()
    
    treeid_dendropytree = {}
    sql = "select id, almethod, phylomodelid, newick from UnsupportedMlPhylogenies"
    con.execute(sql)
    x = cur.fetchall()
    for ii in x:
        treeid = ii[0]
        t = Tree()
        t.read_from_string(newick, "newick")
        treeid_dendropytree[treeid] = t
        
    for ii in treeid_dendropytree:
        treeii = treeid_dendropytree[ii]
        this_row = []
        for jj in treeid_dendropytree:
            treejj = treeid_dendropytree[jj]
            
            """Symmetric Distance"""
            distance = treeii.symmetric_difference(treejj)
            """Store the computed distance in the database."""
            sql = "insert into TreeDistances(metricid, treeida, treeidb, distance) values("
            sql += "1," + ii.__str__() + "," + jj.__str__() + "," + distance.__str__() + ")"
            cur.execute(sql)

            """Euclidean Distance"""
            distance = treeii.euclidean_distance(treejj)
            """Store the computed distance in the database."""
            sql = "insert into TreeDistances(metricid, treeida, treeidb, distance) values("
            sql += "2," + ii.__str__() + "," + jj.__str__() + "," + distance.__str__() + ")"
            cur.execute(sql)
    
    con.commit()
                        
                        
            
    
def get_bipart(ts, species):
    t = Tree.get(data=ts, schema='newick')
    hash_node = t.find_node(lambda n: n.label == '#1')
    sub_nodes = set(n.taxon.label for n in hash_node.leaf_iter())
    all_nodes = set(n.taxon.label for n in t.leaf_node_iter())
    b1 = ''.join('1' if l in sub_nodes else ('0' if l in all_nodes else '?')
                 for l in species)
    b2 = ''.join(neg(v) for v in b1)

    assert b1 != b2
    assert len(b1) == len(b2) and len(b1) == len(species)
    return min(b1, b2)
Exemple #42
0
    def assert_tree_equal_no_labels(self, expected_newick, observed_tree):
        '''should include some tree ordering because ordering of children is not relevant, but eh for now'''
        expected = Tree.get(data=expected_newick, schema='newick', rooting='force-rooted')
        def prep_tree(tree):
            for n in tree.internal_nodes():
                n.label = None
            if n.edge.length is None: n.edge.length=0.0
            tree = self.sort_tree(tree)
        prep_tree(expected)
        prep_tree(observed_tree)

        self.assertEqual(str(self.sort_tree(expected)), str(self.sort_tree(observed_tree)))
Exemple #43
0
def root_tree(f_name, out):

    t = Tree.get(path=f_name, schema="newick", rooting='force-rooted')

    t.reroot_at_midpoint()

    f = open(out, "w+")
    t.write(path=out,
            schema="newick",
            suppress_rooting=True,
            real_value_format_specifier="12.8f")
    f.close()
Exemple #44
0
    def assert_tree_equal_no_labels(self, expected_newick, observed_tree):
        '''should include some tree ordering because ordering of children is not relevant, but eh for now'''
        expected = Tree.get(data=expected_newick, schema='newick', rooting='force-rooted')
        def prep_tree(tree):
            for n in tree.internal_nodes(): 
                n.label = None
            if n.edge.length is None: n.edge.length=0.0
            tree = self.sort_tree(tree)
        prep_tree(expected)
        prep_tree(observed_tree)

        self.assertEqual(str(self.sort_tree(expected)), str(self.sort_tree(observed_tree)))
Exemple #45
0
    def test_write_fasttree_newick(self):
        tc = DendropyTreeCleaner()
        tree = Tree.get(data="((a,b),(d,e))root;", schema='newick')
        self.assertEqual("((a,b),(d,e));\n", self.clean(tc, tree))

        # Internal labels should be removed.
        tree = Tree.get(data="((a_2,b)c,(d,e)f)root;", schema='newick')
        self.assertEqual("((a_2,b),(d,e));\n", self.clean(tc, tree))

        # Quoted spaces should become underscores.
        tree = Tree.get(data="(('a 2',b),(d,e))root;", schema='newick')
        self.assertEqual("((a_2,b),(d,e));\n", self.clean(tc, tree))

        # Test underscores that are quoted.
        tree = Tree.get(data="(('a_2',b),(d,e))root;", schema='newick')
        self.assertEqual("((a_2,b),(d,e));\n", self.clean(tc, tree))

        # Test dashes
        tree = Tree.get(data="((ANME-2dV10_01644,b),(d,e))root;",
                        schema='newick')
        self.assertEqual("((ANME-2dV10_01644,b),(d,e));\n",
                         self.clean(tc, tree))

        # A more real world example with '~' characters (which never mattered actually).
        tree = Tree.get(
            data=
            "('Asulf_Archaeoglobus.1_2280~2522125074':7.17,(('Afulgi_764~2528311132':0.0,'CP006577_764~2588253768':0.0):0.0,'AE000782_746~638154502':0.0)'s__Archaeoglobus fulgidus':7.555):1.461;\n",
            schema='newick')
        self.assertEqual(
            "(Asulf_Archaeoglobus.1_2280~2522125074:7.17,((Afulgi_764~2528311132:0.0,CP006577_764~2588253768:0.0):0.0,AE000782_746~638154502:0.0):7.555):1.461;\n",
            self.clean(tc, tree))
Exemple #46
0
    def test_reroot_trifurcated_tree_at_longest_child(self):
        test_tree_1 = Tree.get(schema='newick',
                               data=u'(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);')
        test_tree_2 = Tree.get(schema='newick',
                               data=u'(A:0.5,B:0.2,(C:0.3,D:0.4):0.1);')
        test_tree_3 = Tree.get(schema='newick',
                               data=u'(A:0.2,B:0.5,(C:0.3,D:0.4):0.1);')

        expected_test_tree_1 = str(
            Tree.get(schema='newick',
                     data=u"((C:0.3,D:0.4):0.25,(A:0.1,B:0.2):0.25);"))
        expected_test_tree_2 = str(
            Tree.get(schema='newick',
                     data=u"(A:0.25,(B:0.2,(C:0.3,D:0.4):0.1):0.25);"))
        expected_test_tree_3 = str(
            Tree.get(schema='newick',
                     data=u"(B:0.25,(A:0.2,(C:0.3,D:0.4):0.1):0.25);"))

        rerooted_test_tree_1 = str(Rerooter().reroot(test_tree_1)).strip()
        rerooted_test_tree_2 = str(Rerooter().reroot(test_tree_2)).strip()
        rerooted_test_tree_3 = str(Rerooter().reroot(test_tree_3)).strip()

        self.assertEqual(rerooted_test_tree_1, expected_test_tree_1)
        self.assertEqual(rerooted_test_tree_2, expected_test_tree_2)
        self.assertEqual(rerooted_test_tree_3, expected_test_tree_3)
Exemple #47
0
def runProgram(referenceTreeFile,
               sampleTreeList,
               bootstrap_cutoff_value=80,
               output_tree="output_tree.tre",
               verbose=False,
               quiet=False,
               timing=False):
    if verbose:
        print("Reference Tree: ", referenceTreeFile)
        print("Sample Tree List: ", sampleTreeList)
        print("Bootstrap Cutoff Value: ", bootstrap_cutoff_value)
        print("Output Tree File: ", output_tree)

    if timing:
        verbose = False

    try:
        reference_tree = Tree.get(path=referenceTreeFile,
                                  schema="newick",
                                  preserve_underscores=True)
    except:
        print(
            "Error with file '{}': please only use files with newick tree format"
            .format(referenceTreeFile))
        sys.exit()

    reference_tree_namespace = reference_tree.taxon_namespace

    sample_tree_list = readTrees(sampleTreeList, reference_tree_namespace,
                                 quiet)

    # Check if gene tree taxon namespace matches reference tree
    for s in sample_tree_list:
        if not reference_tree_namespace.has_taxa_labels(
                s.taxon_namespace.labels()):
            print(
                'Error: reference tree is of a different taxon namespace as the sample trees'
            )
            return

    full_quartet_dictionary = buildFullSupport(sample_tree_list,
                                               bootstrap_cutoff_value, verbose,
                                               quiet, timing)
    if verbose:
        print("Full quartet dictionary with support values")
        [
            print(quartet, full_quartet_dictionary[quartet])
            for quartet in full_quartet_dictionary
        ]
        print()
    buildLabeledTree(referenceTreeFile, full_quartet_dictionary, output_tree,
                     quiet, timing)
def recom_resultFig_dm(recom_prob, mixtureProb):
    output = np.zeros((alignment_len, nodes_number))
    for i in range(len(recom_prob)):
        if (recom_prob['recom_nodes'][i] < tips_num):
            for j in range(alignment_len):
                if (recom_prob['posterior'][i][j][1] >= mixtureProb):
                    output[j, recom_prob['recom_nodes'][i]] = 1
        else:
            # for j in range(alignment_len):
            #     if (recom_prob['posterior'][i][j][1] >= mixtureProb):
            #         output[j, recom_prob['target_node'][i]] = 1
            for j in range(i + 1, len(recom_prob)):
                if (recom_prob['recom_nodes'][i]
                        == recom_prob['target_node'][j]) and (
                            recom_prob['recom_nodes'][j]
                            == recom_prob['target_node'][i]):
                    for k in range(alignment_len):
                        if ((recom_prob['posterior'][i][k][1] >= mixtureProb)
                                and
                            (recom_prob['posterior'][j][k][1] >= mixtureProb)):
                            output[k, recom_prob['target_node'][i]] = 1
                        # if (recom_prob['posterior'][i][k] < recom_prob['posterior'][j][k]):
                        #   recom_prob['posterior'][i][k] = recom_prob['posterior'][j][k]
                        # if (recom_prob['posterior'][i][k] >= mixtureProb):
                        #     output[k, recom_prob['target_node'][i]] = 1

    fig = plt.figure(figsize=(tips_num + 9, tips_num / 2))
    color = ['red', 'green', 'purple', 'blue', 'black']
    clonaltree = Tree.get_from_path(tree_path, 'newick')
    set_index(clonaltree, alignment)
    for i in range(nodes_number):
        ax = fig.add_subplot(nodes_number, 1, i + 1)
        if i >= tips_num:
            desc = set()
            d = give_descendents(clonaltree, i, desc)
            ax.plot(output[:, i],
                    label=str(i) + ' is mrca:' + str(d),
                    color=color[i % 5])
        else:
            ax.plot(output[:, i],
                    label=give_taxon(clonaltree, i),
                    color=color[i % 5])
        ax.legend(bbox_to_anchor=(0.045, 1.5), prop={'size': 10})
        ax.set_frame_on(False)
        ax.axis('off')

    ax.axis('on')
    ax.set_yticklabels([])
    plt.savefig("PhyloHMM_Recombination_two.jpeg")
    # plt.show()

    return output
Exemple #49
0
    def test_joel_bug(self):
        tree67 = u'''[
Thu Sep 10 15:55:28 2015: Loaded from /srv/projects/graftm/testing_files/testing_graftM/tmp_01_decorate/67_otus.tree
Thu Sep 10 15:56:18 2015: tree_67_otus saved to /srv/projects/graftm/testing_files/testing_graftM/tmp_01_decorate/67_otus.rerooted.tree
]
((((1928988:0.10866,2909029:0.15809):0.03546,((801940:0.10703,(3825327:0.12686,4298210:0.09398):0.07480):0.02560,729293:0.21465):0.01982):0.02058,((426860:0.16275,219508:0.12556):0.02403,((1128285:0.06200,4455990:0.07954):0.07525,(815912:0.12348,(3770699:0.23707,823009:0.09955):0.04225):0.01489):0.01849):0.01531):0.09184,(((2361381:0.22741,(3779572:0.06720,4363260:0.07438):0.01460):0.04187,(((((((734152:0.13251,4091454:0.12251):0.03552,((576962:0.14097,(1145804:0.14124,3106714:0.14895):0.01964):0.01668,(2014493:0.15560,(3192744:0.11018,(202294:0.07263,1138804:0.08032):0.05015):0.01277):0.01187):0.01016):0.01486,4323734:0.15004):0.00053,(759363:0.05430,4459468:0.04835):0.03216):0.01531,4322265:0.12041):0.01024,(4391683:0.11058,(229854:0.07735,(4336814:0.09937,((150571:0.07911,2730777:0.10930):0.04404,((4042859:0.25381,(717487:0.13914,4363563:0.19585):0.02281):0.02587,(((3190878:0.16480,4452949:0.07312):0.05029,(4015030:0.10339,(4438491:0.04779,(2286116:0.08699,(4251079:0.03657,4349225:0.02256):0.01189):0.01091):0.04963):0.01748):0.02917,(3014179:0.16455,(2170497:0.16101,(2107103:0.22406,951205:0.11633):0.02436):0.02574):0.03041):0.01561):0.02862):0.02589):0.01914):0.01811):0.01347):0.01451,((182569:0.14758,4363259:0.07793):0.04894,696036:0.14901):0.01514):0.01624):0.02659,(3761685:0.11278,4423155:0.16503):0.03965):0.09184);
'''
        tree70 = u'((4423550:0.17275,((4091454:0.108,4427993:0.1045)50:0.01575,((123662:0.06599,(3269889:0.12737,(104534:0.06041,734152:0.09136)20:0.00526)80:0.01669)90:0.01398,(300695:0.10755,225636:0.1317)100:0.0405)0:0.01073)40:0.0128)20:0.00782,(4377103:0.09243,((172946:0.08097,1145804:0.08645)100:0.02986,(1941303:0.0953,4332975:0.09505)90:0.00838)100:0.02206)90:0.0272,((((1931714:0.07012,(4322265:0.10071,4343117:0.13235)100:0.01842)100:0.03116,(((759363:0.05402,4459468:0.0433)100:0.02405,(294612:0.14484,2679839:0.1009)90:0.02132)70:0.01331,((((((730039:0.15444,((4015030:0.11176,(4438491:0.04568,(4349225:0.02406,(2286116:0.08501,(4251079:0.02026,4386156:0.01582)80:0.01016)40:0.0097)80:0.0168)100:0.03826)50:0.01397,(4308961:0.10766,4452949:0.05355)90:0.06215)40:0.01455)50:0.01325,(((1718272:0.12738,(150571:0.08502,(699249:0.03117,2730777:0.03253)100:0.06302)70:0.02174)60:0.03847,(((2107103:0.20025,3190878:0.14435)40:0.03601,(1824285:0.10892,3014179:0.14706)30:0.02039)0:0.01309,((3366304:0.09202,951205:0.07509)100:0.05732,2170497:0.16332)90:0.02722)10:0.01937)0:0.01868,(3064426:0.20791,((1837676:0.14477,(4363563:0.14803,4479774:0.10823)90:0.04638)90:0.03766,(4042859:0.2295,717487:0.15674)40:0.01749)20:0.01416)0:0.01063)0:0.03387)100:0.04795,4336814:0.08037)0:0.02958,(346735:0.11193,4391683:0.07639)60:0.00894)0:0.01312,1142178:0.07594)0:0.01881,(229854:0.0646,4460175:0.09289)90:0.02422)20:0.01731)0:0.01339)0:0.00777,(((2984017:0.05634,4340384:0.07722)80:0.03016,(((4371218:0.13005,(1133483:0.08797,3106714:0.09717)90:0.02053)80:0.02174,(3256066:0.08328,4022282:0.11841)90:0.03619)100:0.03392,((202294:0.06795,1138804:0.07777)100:0.05296,(3192744:0.09608,(2014493:0.11684,(180127:0.06532,4417185:0.0713)100:0.03824)100:0.0368)40:0.01663)70:0.00787)50:0.01733)10:0.0083,(222095:0.1391,(288404:0.13004,(4323734:0.07601,4446882:0.06844)60:0.01661)100:0.02863)40:0.01639)0:0.00846)0:0.0135,(((((1133369:0.07769,4336154:0.07979)100:0.11778,(((708774:0.0822,((114724:0.047,82092:0.04936)100:0.11526,(201206:0.10329,4423155:0.14181)60:0.03138)40:0.01886)80:0.03209,(202302:0.11673,3761685:0.09059)100:0.02325)90:0.02946,(((576962:0.11188,202459:0.09918)90:0.033,(213358:0.0989,(3390949:0.09853,3726184:0.09836)90:0.03298)90:0.02315)20:0.01425,202949:0.15903)0:0.01188)20:0.02709)10:0.01609,((4323100:0.0982,4409929:0.10612)60:0.01386,((696036:0.11283,(203529:0.18615,202449:0.08377)10:0.02209)30:0.02916,((2361381:0.18808,203220:0.10905)100:0.04166,(4363260:0.07208,(3779572:0.04977,114015:0.13268)70:0.02151)70:0.01055)100:0.04229)0:0.01717)0:0.01634)0:0.00519,(539547:0.12233,(4409453:0.14784,(4363259:0.05689,((268769:0.0594,266521:0.05311)100:0.04977,(182569:0.10314,4463866:0.07165)70:0.01505)100:0.04024)80:0.01602)100:0.05088)20:0.02162)0:0.0112,((573196:0.11279,((((3825327:0.11767,4298210:0.09472)100:0.07495,(836195:0.11165,801940:0.09002)100:0.02232)90:0.0347,((1928988:0.1129,(1129716:0.13293,2909029:0.13959)50:0.01858)70:0.02572,(((815912:0.12176,((219508:0.13512,(426860:0.12643,(202758:0.04748,4344033:0.03692)100:0.11429)90:0.0487)20:0.00791,((823117:0.10669,823009:0.0888)90:0.0381,3770699:0.24911)50:0.02136)40:0.02309)30:0.01326,(4455990:0.05381,(1128285:0.06585,4271527:0.03794)70:0.02727)100:0.06911)10:0.01546,4097115:0.09311)30:0.02142)20:0.01039)20:0.02855,(729293:0.18117,3871866:0.11553)90:0.03599)100:0.15854)20:0.02836,150700:0.13922)20:0.02787)0:0.00717)0:0.00859)100;'
        
        old_tree = Tree.get(schema='newick', data=tree67)
        tree_to_reroot = Tree.get(schema='newick', data=tree70)
        new_tree = Rerooter().reroot_by_tree(
            old_tree,
            tree_to_reroot)
        
        expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes()
        expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes()
        for tip in expected_lefts:
            self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[1].leaf_nodes()])
        for tip in expected_rights:
            self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[0].leaf_nodes()])
        self.assertEqual(len(tree_to_reroot.leaf_nodes()), len(new_tree.leaf_nodes()))
Exemple #50
0
def summary_to_nw_str(mcmc_tree_filename):
    calc_summary_tree(mcmc_tree_filename)
    # convert summary nexus tree to newick for ete3
    tns = TaxonNamespace(is_case_sensitive=True)
    filename = mcmc_tree_filename + "_summary.tree"
    dp_tree = Tree.get(path=filename, 
                       schema="nexus",
                       taxon_namespace=tns,
                       case_sensitive_taxon_labels=True,
                       suppress_internal_node_taxa=False)

    # drop all annotations and illegal characters
    return dp_tree.as_string('newick', suppress_annotations=True)[5:].rstrip("\n")                   
Exemple #51
0
 def bipartition_by_root(self):
     if (self.n_leaves == 1):
         return (None, None, None)
     root = self._tree.seed_node
     t1_root = root._child_nodes[0]
     t = self._tree
     t.prune_subtree(t1_root, update_splits=True, delete_outdegree_one=True)
     t1 = PhylogeneticTree(t)
     t2 = PhylogeneticTree(Tree(t1_root))
     # Reroot if there's more than node left
     if (t2.n_leaves > 1):
         t2._tree.reroot_at_node(t1_root)
     return t1, t2, root
Exemple #52
0
    def getBalancedTreeByHeight(self, size, th):
        clades  = int(math.log(size,2)) 
        unit = (th*1.0) / clades
        self.taxCtr = 0;

        self.bal_newick = ''
        self.buildBalancedString(1,clades,unit)
        self.bal_newick += ';'
        #print(self.bal_newick)
        tree = Tree.get_from_string(self.bal_newick,"newick")
        #print(tree)
        tree.deroot()
        return tree
Exemple #53
0
def sample_with_outgroups(a_tree, n_ingroups, n_outgroups=1, n_reps=1):
    # sample n_reps trees from a large tree, each has n_ingroups and n_outgroups taxa
    samples = []
    for i in range(n_reps):
        t = Tree(a_tree)
        check, igs, ogs = sample_and_prune(t,
                                           n_ingroups,
                                           n_outgroups=n_outgroups)
        if not check:
            return False, samples
        samples.append((t, igs, ogs))

    return True, samples
Exemple #54
0
    def get_dendropy_tree_from_break_tree(self, break_tree):
        nodes = self._extract_break_tree_leaves(break_tree)
        sorted_break_tree_leaves_sets, max_leaves_set_size = self._sort_break_tree_leaves_sets(break_tree)

        for leave_set_size in range(max_leaves_set_size):
            if leave_set_size not in sorted_break_tree_leaves_sets:
                continue

            for leave_set in sorted_break_tree_leaves_sets[leave_set_size]:
                one_step_parents = []
                for leaf in leave_set:
                    node = self._find_node_with_same_taxon(nodes, leaf)
                    oldest_parent = self._get_oldest_parent(node)
                    if oldest_parent not in one_step_parents:
                        one_step_parents.append(oldest_parent)

                new_oldest_parent = Node()
                if len(one_step_parents) > 1:
                    for parent in one_step_parents:
                        parent.parent_node = new_oldest_parent

        oldest_parents = []
        for node in nodes:
            oldest_parent = self._get_oldest_parent(node)
            if oldest_parent not in oldest_parents:
                oldest_parents.append(oldest_parent)

        if len(oldest_parents) > 1:
            seed = Node()
            for oldest_parent in oldest_parents:
                oldest_parent.parent_node = seed
        elif len(oldest_parents) == 1:
            seed = oldest_parents[0]
        else:
            seed = Node()

        tree = Tree(seed_node=seed)
        tree.deroot()
        return tree
Exemple #55
0
def generate_ATT_from_phylesystem(aln,
                                  workdir,
                                  study_id,
                                  tree_id,
                                  phylesystem_loc='api'):
    """gathers together tree, alignment, and study info - forces names to otu_ids.
    Outputs AlignTreeTax object.
    an alignemnt, a
    Input can be either a study ID and tree ID from OpenTree
    Alignemnt need to be a Dendropy DNA character matrix!"""
    #TODO CHECK ARGS
    assert(isinstance(aln, datamodel.charmatrixmodel.DnaCharacterMatrix))
    for tax in aln.taxon_namespace:
        tax.label = tax.label.replace(" ", "_") #Forcing all spaces to underscore UGH
    nexson = get_nexson(study_id, phylesystem_loc)
    ott_ids = get_subtree_otus(nexson,
                               tree_id=tree_id,
                               subtree_id="ingroup",
                               return_format="ottid")
    ott_mrca = get_mrca_ott(ott_ids)
    newick = extract_tree(nexson,
                          tree_id,
                          PhyloSchema('newick',
                                      output_nexml2json='1.2.1',
                                      content="tree",
                                      tip_label="ot:originalLabel"))
    newick = newick.replace(" ", "_") #UGH Very heavy handed, need to make sure happens on alignement side as well.
    tre = Tree.get(data=newick,
                   schema="newick",
                   preserve_underscores=True,
                   taxon_namespace=aln.taxon_namespace)
    otus = get_subtree_otus(nexson, tree_id=tree_id)
    otu_dict = {}
    orig_lab_to_otu = {}
    treed_taxa = {}
    for otu_id in otus:
        otu_dict[otu_id] = extract_otu_nexson(nexson, otu_id)[otu_id]
        otu_dict[otu_id]['^physcraper:status'] = "original"
        otu_dict[otu_id]['^physcraper:last_blasted'] = "1900/01/01"
        orig = otu_dict[otu_id].get(u'^ot:originalLabel').replace(" ", "_")
        orig_lab_to_otu[orig] = otu_id
        treed_taxa[orig] = otu_dict[otu_id].get(u'^ot:ottId')
    for tax in aln.taxon_namespace:
        try:
            tax.label = orig_lab_to_otu[tax.label].encode('ascii')
        except KeyError:
            sys.stderr.write("{} doesn't have an otu id. It is being removed from the alignement. This may indicate a mismatch between tree and alignement\n".format(tax.label))
   #need to prune tree to seqs and seqs to tree...     
    otu_newick = tre.as_string(schema="newick")
    return AlignTreeTax(otu_newick, otu_dict, aln, ingroup_mrca=ott_mrca, workdir=workdir) #newick should be bare, but alignement should be DNACharacterMatrix
def returnRootOfTree( infile, filePrefix, ext):
    '''
    input: path to the file containing newick tree
    return root of the Tree 
    '''
    
    directory=os.path.dirname(os.path.realpath(infile))
    treePath= directory+'/'+filePrefix+'.'+ ext
    rootNode=''
    myTree= Tree.get_from_path(treePath, 'newick', annotations_as_nhx=True, extract_comment_metadata=True , suppress_annotations=False)
    for i in myTree.internal_nodes():
        if i.level() == 0:
            rootNode=i.get_node_str()
            break
    return rootNode
Exemple #57
0
 def test_input_unrooted_tree(self):
     otu61 = os.path.join(path_to_data, '61_otus.gpkg','61_otus.refpkg')
     with tempfile.NamedTemporaryFile(suffix='.fa') as bad_alignment:
         with tempdir.TempDir() as tmp:
             Create(prerequisites).main(
                 taxtastic_taxonomy=os.path.join(otu61,'61_otus_taxonomy.csv'),
                 taxtastic_seqinfo=os.path.join(otu61,'61_otus_seqinfo.csv'),
                 # created with newick_utils:
                 # nw_prune test/data/61_otus.gpkg/61_otus.refpkg/61_otus.tre 4459468 >test/data/61_otus.without_4459468.tre
                 unrooted_tree=os.path.join(path_to_data,'create','61_otus.without_4459468.tre'),
                 sequences=os.path.join(path_to_data,'create','61_otus.without_4459468.fasta'),
                 alignment=os.path.join(path_to_data,'create','61_otus.without_4459468.aln.fasta'),
                 prefix=tmp, force=True)
             gpkg = GraftMPackage.acquire(tmp)
             tree=Tree.get(schema='newick', data=open(gpkg.reference_package_tree_path()).readline())
             self.assertEqual(21, len(tree.leaf_nodes()))
def read_matrix_and_tree(char_file_path,
                         tree_file_path,
                         char_type=DnaCharacterMatrix,
                         char_schema='fasta',
                         tree_schema='newick'):
    if char_file_path:
        d = char_type.get(path=char_file_path, schema=char_schema)
        tn = d.taxon_namespace
        tn.is_mutable = False
    else:
        d, tn = None, None
    tree = Tree.get(path=tree_file_path,
                    schema=tree_schema,
                    preserve_underscores=True,
                    taxon_namespace=tn)
    return d, tree