Exemple #1
0
def get_example_tree():

    # Performs a tree reconciliation analysis
    gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));'
    species_tree_nw = "((((Hsa, Ptr), Mmu), (Mms, Cfa)), Dme);"
    genetree = PhyloTree(gene_tree_nw)
    sptree = PhyloTree(species_tree_nw)
    recon_tree, events = genetree.reconcile(sptree)
    recon_tree.link_to_alignment(alg)
    return recon_tree, TreeStyle()
Exemple #2
0
def load_tree_sequences(nwk_file, fasta_file):
    ''' 
    Load a tree with associated sequences on leaves. 
    '''
    tree = PhyloTree(newick=nwk_file, format=1)
    tree.link_to_alignment(alignment=fasta_file, alg_format='fasta')
    return tree
Exemple #3
0
def run(args):
    from ete2 import Tree, PhyloTree
    for nw in args.src_tree_iterator:
        if args.orthologs is not None:
            t = PhyloTree(nw)
            for e in t.get_descendant_evol_events():
                print e.in_seqs, e.out_seqs
Exemple #4
0
    def get_tree(self, protid, method, phylome_id):
        """ Returns the method-tree associated to a given protid. """

        cmd = 'SELECT newick,lk FROM %s WHERE phylome_id=%s AND species="%s" AND protid="%s" AND method ="%s"' %\
            (self._trees_table, phylome_id, protid[:3],protid[3:],method)
        if self._SQL.execute(cmd):
            entry = self._SQL.fetchone()
            nw = entry[0]
            lk = float(entry[1])
            t = PhyloTree(nw)
        else:
            t = None
            lk = None
        return t, lk
def integrate_pwids_into_tree(tree, alignment):
    '''Takes a tree and an alignment and returns a new tree with values of pwid added to each
    node in the tree as node.pwid.'''
    pt = PhyloTree(tree,alignment=alignment,alg_format="fasta")
    for ind, node in enumerate(pt.traverse()):
        node.node_kerf_name = 'node%s' % str(ind).zfill(3)
        # For later kerf and sh functions
        node.kerf_pass = False
        node.sh_pass = False
        if not node.is_leaf():
            node.min_pwid = get_min_pwid_of_leaves(node.get_leaves())
        else:
            node.min_pwid = 1.0
    return pt
Exemple #6
0
def get_topology(taxids, intermediate_nodes=False, rank_limit=None):
    from ete2 import PhyloTree
    sp2track = {}
    elem2node = {}
    for sp in taxids:
        track = deque()
        lineage = get_sp_lineage(sp)
        id2rank = get_ranks(lineage)

        for elem in lineage:
            node = elem2node.setdefault(elem, PhyloTree())
            node.name = str(elem)
            node.add_feature("rank", str(id2rank.get(int(elem), "?")))
            track.append(node)
        sp2track[sp] = track

    # generate parent child relationships
    for sp, track in sp2track.iteritems():
        parent = None
        for elem in track:
            if parent and elem not in parent.children:
                parent.add_child(elem)
            if rank_limit and elem.rank == rank_limit:
                break
            parent = elem
    root = elem2node[1]

    # This fixes cases in which requested taxids are internal nodes
    #for x in set(sp2track) - set([n.name for n in root.iter_leaves()]):
    #    new_leaf = sp2track[x][-1].copy()
    #    for ch in new_leaf.get_children():
    #        ch.detach()
    #    sp2track[x][-1].add_child(new_leaf)

    #remove onechild-nodes
    if not intermediate_nodes:
        for n in root.get_descendants():
            if len(n.children) == 1 and int(n.name) not in taxids:
                n.delete(prevent_nondicotomic=False)

    if len(root.children) == 1:
        return root.children[0].detach()
    else:
        return root
Exemple #7
0
def main():
    fn = sys.argv[1]
    nw = open(fn).readline()

    species = {}
    t = PhyloTree(nw)

    #set species naming function
    t.set_species_naming_function(_get_spcode)

    for l in t.get_leaves():
        spCode = l.species
        try:
            species[spCode] += 1
        except:
            species[spCode] = 1

    for spCode in sorted(species, key=lambda x: species[x], reverse=True):
        print '%s\t%s' % (spCode, species[spCode])
Exemple #8
0
def phylomedump_tree_iterator( tarfn,verbose=0 ):
    """PhylomeDB all_trees.tar.gz dump treeobj generator."""
    #open tarfile
    if tarfn.endswith(".gz"):
        tar = tarfile.open(tarfn, "r:gz")
    else:
        tar = tarfile.open(tarfn, "r")

    i = k = 0
    #process entries
    for m in tar:
        #if i>100: break
        if not m.isfile():
            continue
        #load tree
        if   m.name.endswith(".nw"):
            i += 1
            #get nw
            nw = tar.extractfile(m).readline()
            t  = PhyloTree(nw)
            ##add seedid and method info
            #Phy000CWA9_YEAST.JTT.nw --> Phy000CWA9_YEAST JTT
            seedid, method = os.path.basename(m.name).split(".")[:2]
            t.seedid = seedid
            t.method = method
        #or add lk, seedid, method and lk to treeobj
        elif m.name.endswith(".lk"):
            seedid, method, lk = tar.extractfile(m).readline().split('\t')[:3]
            t.lk = float(lk)
            if not t.lk:
                sys.stderr.write( " Err: Zero likelihood (%s) for: %s\n" % (t.lk, ", ".join((t.seedid, t.method))))
                continue
            if seedid!=t.seedid or t.method != method:
                sys.stderr.write( " Err: Seedid and/or method doesn't match: %s\n" % ", ".join((seedid, t.seedid, method, t.method)))
                continue
            k += 1
            if verbose and not i%100:
                sys.stderr.write( "  %6i\r" % i )

            yield t
    if verbose:
        sys.stderr.write( " %s out of %s trees succesfully parsed [memory: %s KB]\n" % (k, i, resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
Exemple #9
0
    def get_best_tree(self, protid, phylome_id):
        """ Returns the winner ML tree"""

        likelihoods = {}
        winner_model = None
        winner_lk = None
        winner_newick = None
        t = None
        command ='SELECT newick,method,lk FROM %s WHERE phylome_id=%s AND species="%s" and protid="%s";' \
            % (self._trees_table,phylome_id, protid[:3], protid[3:])
        self._SQL.execute(command)
        result = self._SQL.fetchall()
        for r in result:
            nw, m, lk = r
            if lk < 0:
                likelihoods[m] = lk
                if winner_lk == None or lk > winner_lk:
                    winner_lk = lk
                    winner_model = m
                    winner_newick = nw
        if winner_newick:
            t = PhyloTree(winner_newick)
        return winner_model, likelihoods, t
Exemple #10
0
def build_hmm_from_tree(tree_name, aln_name, msa_dir, hmm_dir):
    '''
    Reads tree and corresponding msa and create an MSA & HMM for each internal node.
    '''

    # Annotate internal nodes with name of corresponding HMM.
    pt = PhyloTree(tree_name, alignment=aln_name, alg_format="fasta")
    i_node = 0
    for node in pt.traverse():
        if not node.is_leaf():
            node_name = 'node%s' % (str(i_node))
            #print node_name
            #print node
            node.add_features(hmm=node_name)

            i_node += 1

            # make msa for node
            msa_string = []
            for leaf in node.iter_leaves():
                msa_string.append(">%s" % leaf.name)
                msa_string.append(str(leaf.sequence))
            msa_string = '\n'.join(msa_string)
            msa = open('%s%s.aln' % (msa_dir, node_name), 'w')
            msa.write(msa_string)
            msa.close()

            # build HMM for node
            check_call([
                'build_hmmer3_hmm_from_alignment.py', '--name',
                '%s%s' % (hmm_dir, node_name),
                '%s%s.aln' % (msa_dir, node_name)
            ])

    #concatenate HMMs into one file for Hmmscan
    os.system('cat %s*.hmm > %sconcat.hmm' % (hmm_dir, hmm_dir))
    return pt
Exemple #11
0
    def get_topology(self, taxids, intermediate_nodes=False, rank_limit=None, collapse_subspecies=False):
        """Given a list of taxid numbers, return the minimal pruned NCBI taxonomy tree
        containing all of them.

        :param False intermediate_nodes: If True, single child nodes
        representing the complete lineage of leaf nodes are kept. Otherwise, the
        tree is pruned to contain the first common ancestor of each group.

        :param None rank_limit: If valid NCBI rank name is provided, the tree is
        pruned at that given level. For instance, use rank="species" to get rid
        of sub-species or strain leaf nodes.

        """
        from ete2 import PhyloTree
        sp2track = {}
        elem2node = {}
        for sp in taxids:
            track = deque()
            lineage = self.get_sp_lineage(sp)
            id2rank = self.get_ranks(lineage)

            for elem in lineage:
                node = elem2node.setdefault(elem, PhyloTree())
                node.name = str(elem)
                node.add_feature("rank", str(id2rank.get(int(elem), "?")))
                track.append(node)
            sp2track[sp] = track

        # generate parent child relationships
        for sp, track in sp2track.iteritems():
            parent = None
            for elem in track:
                if parent and elem not in parent.children:
                    parent.add_child(elem)
                if rank_limit and elem.rank == rank_limit:
                    break
                parent = elem
        root = elem2node[1]

        # This fixes cases in which requested taxids are internal nodes
        #for x in set(sp2track) - set([n.name for n in root.iter_leaves()]):
        #    new_leaf = sp2track[x][-1].copy()
        #    for ch in new_leaf.get_children():
        #        ch.detach()
        #    sp2track[x][-1].add_child(new_leaf)

        #remove onechild-nodes
        if not intermediate_nodes:
            for n in root.get_descendants():
                if len(n.children) == 1 and int(n.name) not in taxids: 
                    n.delete(prevent_nondicotomic=False)

        if collapse_subspecies:
            species_nodes = [n for n in t.traverse() if n.rank == "species"
                             if int(n.taxid) in all_taxids]
            for sp_node in species_nodes:
                bellow = sp_node.get_descendants()
                if bellow:
                    # creates a copy of the species node
                    connector = sp_node.__class__()
                    for f in sp_node.features:
                        connector.add_feature(f, getattr(sp_node, f))
                    connector.name = connector.name + "{species}"
                    for n in bellow:
                        n.detach()
                        n.name = n.name + "{%s}" %n.rank
                        sp_node.add_child(n)
                    sp_node.add_child(connector)
                    sp_node.add_feature("collapse_subspecies", "1")
                    
        if len(root.children) == 1:
            return root.children[0].detach()
        else:
            return root
Exemple #12
0
__author__ = 'mjohnpayne'

import sys
from ete2 import Tree, faces, AttrFace, TreeStyle, NodeStyle, PhyloTree, PieChartFace
import math

# infile = open('/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree.nwk','r')
# outfile = open('/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree_nos_only.nwk','w')
# infile = infile.read()

t = PhyloTree(
    '/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree.nwk',
    format=1)

# ts = TreeStyle()
#
# t.show(tree_style=ts)

for node in t:  #.iter_search_nodes():
    # name = node.name
    # name = name[name.find("_")+1:]
    # node.name = name
    print node.name
    if node.name == "41":
        node.dist = 5e-05

# t.write(outfile='/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree_nos.nwk',format=1,)

ts = TreeStyle()

t.show(tree_style=ts)
Exemple #13
0
    "delta.plot": "delta_plot",
    "dist.dna": "dist_dna",
    "dist.nodes": "dist_nodes",
    "node.depth": "node_depth",
    "node.depth.edgelength": "node_depth_edgelength",
    "node.height": "node_height",
    "node.height.clado": "node_height_clado",
    "prop.part": "prop_part"
}

ape = importr("ape", robject_translations=ape_objects)  # Required for phangorn
ph = importr("phangorn")  # Phylogenetic operations in R

print "All modules imported successfully"

t = PhyloTree(intree, alignment=seq,
              alg_format=seq_format)  # Main tree containing entire sequence
dtp = PhyloTree(intree)  # Dummy tree for phenotype shuffling

print "Tree file read successfully"

phenfile = open(phen, "r")  # Phenotype file
phenlist = []
for line in phenfile.readlines():
    phenlist.append([line.split("\t")[0].strip(), line.split("\t")[1].strip()])
phenfile.close()

phenotype = {
}  # Dictionary containing species names and their phenotype values

# Phenotype file should have two columns separated by tab containing taxa name
# in the first column and a numerical phenotype value in the second
Exemple #14
0
                               ["Green", "Red"])
            pie.opacity = 0.5
            #faces.add_face_to_node(name_face, node, column=0, position="branch-right")
            faces.add_face_to_node(pie, node, column=0, position="float")


ts = TreeStyle()
# Do not add leaf names automatically ts.show_leaf_name = False
# Use my custom layout
ts.show_leaf_name = False
ts.layout_fn = my_layout

#t = PhyloTree('/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree_nos.nwk', format=1)

t = PhyloTree(
    '/Volumes/MP_HD/CI_GENOME_SEQ/CI_orthomcl_data/gain_loss_tree_frm_orthogroups/CI_badirate_branch_no_tree_no_names.nwk',
    format=1)

#dataorder = ['FRR2161','FRR3841','FRR4059','FRR3840','F4','BR2SD2','BR2','BR2SD1','G09043','G11702','G11203SD4','G11203SD3','G11203','G11203SD1','G09027SD2','G09027SD1','G09027','FRR3871','FRR3482','HR2','G11012']
#nos = ["1",'2','4','5','7','10','11','13','15','17','19','20','22','24','26','27','29','32','33','35','37']

#dataorder = ['FRR2161','FRR3841','FRR3840','FRR4059','F4','BR2SD2','BR2','BR2SD1','G09043','G11702','G11203SD4','G11203SD3','G11203','G11203SD1','G09027SD2','G09027SD1','G09027','FRR3871','FRR3482','HR2','G11012']
#nos = ["1",'2','4','5','7','10','11','13','15','17','19','20','22','24','26','27','29','32','33','35','37']

#branch_to_node = {24:25,39:38,41:43,33:35,5:5,31:0,18:22,14:16,40:42,42:41,35:36,36:32,27:30,15:2,26:29,12:15,29:26,21:19,11:4,32:34,6:11,17:20,22:17,16:18,13:3,34:33,43:37,3:6,7:13,8:14,37:39,10:10,44:31,30:24,20:21,2:8,1:7,38:40,28:28,4:9,25:27,19:23,23:1,9:12}
#print branch_to_node

inchanges = open(
    '/Volumes/MP_HD/CI_GENOME_SEQ/CI_orthomcl_data/gain_loss_tree_frm_orthogroups/CI_denovo_gene_gain_loss_table.txt',
    'r')
Exemple #15
0
def run(args):
    from ete2 import Tree, PhyloTree

    features = set()
    for nw in args.src_tree_iterator:
        if args.ncbi:
            tree = PhyloTree(nw)
            features.update([
                "taxid", "name", "rank", "bgcolor", "sci_name",
                "collapse_subspecies", "named_lineage", "lineage"
            ])
            tree.annotate_ncbi_taxa(args.taxid_attr)
        else:
            tree = Tree(nw)

        type2cast = {
            "str": str,
            "int": int,
            "float": float,
            "set": set,
            "list": list
        }

        for annotation in args.feature:
            aname, asource, amultiple, acast = None, None, False, str
            for field in annotation:
                try:
                    key, value = map(strip, field.split(":"))
                except Exception:
                    raise ValueError("Invalid feature option [%s]" % field)

                if key == "name":
                    aname = value
                elif key == "source":
                    asource = value
                elif key == "multiple":
                    #append
                    amultiple = value
                elif key == "type":
                    try:
                        acast = type2cast[value]
                    except KeyError:
                        raise ValueError("Invalid feature type [%s]" % field)
                else:
                    raise ValueError("Unknown feature option [%s]" % field)

            if not aname and not asource:
                ValueError(
                    'name and source are required when annotating a new feature [%s]'
                    % annotation)

            features.add(aname)
            for line in open(asource, 'rU'):
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                nodenames, attr_value = map(strip, line.split('\t'))
                nodenames = map(strip, nodenames.split(','))
                relaxed_grouping = True
                if nodenames[0].startswith('!'):
                    relaxed_grouping = False
                    nodenames[0] = nodenames[0][1:]

                if len(nodenames) > 1:
                    target_node = tree.get_common_ancestor(nodenames)
                    if not relaxed_grouping:
                        pass
                        # do something
                else:
                    target_node = tree & nodenames[0]

                if hasattr(target_node, aname):
                    log.warning('Overwriting annotation for node" [%s]"' %
                                nodenames)
                else:
                    target_node.add_feature(aname, acast(attr_value))

        dump(tree, features=features)
Exemple #16
0
 def __init__(self):
     self.taxoDB = {}
     self.tree = PhyloTree()
     self.tree.name = "NoName"
Exemple #17
0
            pie = PieChartFace([changes[node.name][0], changes[node.name][1]],
                               changes[node.name][2], changes[node.name][2],
                               ["Green", "Red"])
            pie.opacity = 0.5
            #faces.add_face_to_node(name_face, node, column=0, position="branch-right")
            faces.add_face_to_node(pie, node, column=0, position="float")


ts = TreeStyle()
# Do not add leaf names automatically ts.show_leaf_name = False
# Use my custom layout
ts.show_leaf_name = False
ts.layout_fn = my_layout

t = PhyloTree(
    '/Volumes/MP_HD/Pm_Ts_Tf_Pf_comparison/4_spec_gene_gain_loss/eurot_gene_gain_loss/node_assignments_tree_nos_only_dbl.nwk',
    format=1)

dataorder = [
    'A. fumigatus', 'N. fisheri', 'A. clavatus', 'A. terreus', 'A. flavus',
    'A. oryzae', 'A. niger', 'A. nidulans', 'P. decumbens', 'P. roquefortii',
    'P. chrysogenum', 'P. digitatum', 'T. stipitatus', 'P. funiculosum',
    'T. marneffei', 'T. flavus', 'A. dermatiditis', 'H. capsulatum',
    'P. brasiliensis', 'C. immitis', 'U. reesei', 'T. equinum', 'T. tonsurans'
]
nos = [
    "1", '2', '4', '6', '7', '8', '12', '14', '16', '17', '18', '19', '24',
    '25', '26', '27', '32', '33', '35', '37', '38', '40', '41'
]

branch_to_node = {
Exemple #18
0
    def get_topology(self,
                     taxids,
                     intermediate_nodes=False,
                     rank_limit=None,
                     collapse_subspecies=False,
                     annotate=True):
        """Given a list of taxid numbers, return the minimal pruned NCBI taxonomy tree
        containing all of them.

        :param False intermediate_nodes: If True, single child nodes
        representing the complete lineage of leaf nodes are kept. Otherwise, the
        tree is pruned to contain the first common ancestor of each group.

        :param None rank_limit: If valid NCBI rank name is provided, the tree is
        pruned at that given level. For instance, use rank="species" to get rid
        of sub-species or strain leaf nodes.

        :param False collapse_subspecies: If True, any item under the species
        rank will be collapsed into the species upper node.

        """
        from ete2 import PhyloTree
        sp2track = {}
        elem2node = {}
        for sp in taxids:
            track = []
            lineage = self.get_lineage(sp)
            id2rank = self.get_rank(lineage)

            for elem in lineage:
                if elem not in elem2node:
                    node = elem2node.setdefault(elem, PhyloTree())
                    node.name = str(elem)
                    node.taxid = elem
                    node.add_feature("rank",
                                     str(id2rank.get(int(elem), "no rank")))
                else:
                    node = elem2node[elem]
                track.append(node)
            sp2track[sp] = track

        # generate parent child relationships
        for sp, track in sp2track.iteritems():
            parent = None
            for elem in track:
                if parent and elem not in parent.children:
                    parent.add_child(elem)
                if rank_limit and elem.rank == rank_limit:
                    break
                parent = elem
        root = elem2node[1]

        #remove onechild-nodes
        if not intermediate_nodes:
            for n in root.get_descendants():
                if len(n.children) == 1 and int(n.name) not in taxids:
                    n.delete(prevent_nondicotomic=False)

        if len(root.children) == 1:
            tree = root.children[0].detach()
        else:
            tree = root

        if collapse_subspecies:
            to_detach = []
            for node in tree.traverse():
                if node.rank == "species":
                    to_detach.extend(node.children)
            for n in to_detach:
                n.detach()
        if annotate:
            self.annotate_tree(tree)

        return tree
Exemple #19
0
ts = TreeStyle()
# ts.mode = "c"

for i in open(in_id,"r").readlines():
    i=i.strip('\n')
    print i
    outf = "/".join(in_id.split('/')[:-1])+"/top_hits_pm1_madss/"+i+"_blastp_hits_"+in_eval+".fasta"
    no_hits = blast_gene(i,in_eval,indb,outf)
    print no_hits
    align_args = "/usr/local/bin/megacc -a "+ align_mao +" -o "+align_dir+" -s -d " + outf
    subprocess.Popen(align_args, shell=True).wait()
    sl(2)
    align_lis = glob.glob(align_dir + "/*.meg")
    alignpath = ''
    for j in align_lis:
        if i in j:
            tree_args = "/usr/local/bin/megacc -a "+ tree_mao +" -o "+tree_dir+" -d " + j
            subprocess.Popen(tree_args, shell=True).wait()
    tree_ls = glob.glob(tree_dir + "/*.nwk")
    for j in tree_ls:
        if i in j and "consensus" not in j:
            t = PhyloTree(j, format=1)
            #t.show()
            # t = Phylo.read(j,"newick")
            # #t.ladderize()
            # #Phylo.draw(t)
            # Phylo.write(t,j.replace(".nwk",".xml"),"phyloxml")
            # Phylo.draw_graphviz(t,prog="neato")
            t.render(tree_dir+"/"+i+"_blastp_hits_"+in_eval+".pdf",tree_style=ts,dpi=200)
Exemple #20
0
def run(args):
    if args.text_mode:
        from ete2 import Tree
        for tindex, tfile in enumerate(args.src_tree_iterator):
            #print tfile
            if args.raxml:
                nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read())
                t = Tree(nw)
            else:
                t = Tree(tfile)
            
            print t.get_ascii(show_internal=args.show_internal_names,
                              attributes=args.show_attributes)
        return
        
    import random
    import re
    import colorsys
    from collections import defaultdict
    from ete2 import (Tree, PhyloTree, TextFace, RectFace, faces, TreeStyle,
                         add_face_to_node, random_color)
    
    global FACES
    
    if args.face:
        FACES = parse_faces(args.face)
    else:
        FACES = []

    # VISUALIZATION
    ts = TreeStyle()
    ts.mode = args.mode
    ts.show_leaf_name = True
    ts.tree_width = args.tree_width

    
    for f in FACES:
        if f["value"] == "@name":
            ts.show_leaf_name = False
            break

    if args.as_ncbi:
        ts.show_leaf_name = False
        FACES.extend(parse_faces(
            ['value:@sci_name, size:10, fstyle:italic',
             'value:@taxid, color:grey, size:6, format:" - %s"',
             'value:@sci_name, color:steelblue, size:7, pos:b-top, nodetype:internal',
             'value:@rank, color:indianred, size:6, pos:b-bottom, nodetype:internal',
         ]))


    if args.alg:
        FACES.extend(parse_faces(
            ['value:@sequence, size:10, pos:aligned, ftype:%s' %args.alg_type]
         ))

    if args.heatmap:
        FACES.extend(parse_faces(
            ['value:@name, size:10, pos:aligned, ftype:heatmap']
         ))
        
    if args.bubbles:
        for bubble in args.bubbles:
            FACES.extend(parse_faces(
                ['value:@%s, pos:float, ftype:bubble, opacity:0.4' %bubble,
             ]))
                        
    ts.branch_vertical_margin = args.branch_separation
    if args.show_support:
        ts.show_branch_support = True
    if args.show_branch_length:
        ts.show_branch_length = True
    if args.force_topology:
        ts.force_topology = True
    ts.layout_fn = lambda x: None
        
    for tindex, tfile in enumerate(args.src_tree_iterator):
        #print tfile
        if args.raxml:
            nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read())
            t = PhyloTree(nw)
        else:
            t = PhyloTree(tfile)

            
        if args.alg:
            t.link_to_alignment(args.alg, alg_format=args.alg_format)

        if args.heatmap:
            DEFAULT_COLOR_SATURATION = 0.3
            BASE_LIGHTNESS = 0.7
            def gradient_color(value, max_value, saturation=0.5, hue=0.1):    
                def rgb2hex(rgb):
                    return '#%02x%02x%02x' % rgb
                def hls2hex(h, l, s):
                    return rgb2hex( tuple(map(lambda x: int(x*255), colorsys.hls_to_rgb(h, l, s))))

                lightness = 1 - (value * BASE_LIGHTNESS) / max_value
                return hls2hex(hue, lightness, DEFAULT_COLOR_SATURATION)

            
            heatmap_data = {}
            max_value, min_value = None, None
            for line in open(args.heatmap):
                if line.startswith('#COLNAMES'):
                    pass
                elif line.startswith('#') or not line.strip():
                    pass
                else:
                    fields = line.split('\t')
                    name = fields[0].strip()

                    values = map(lambda x: float(x) if x else None, fields[1:])

                    maxv = max(values)
                    minv = min(values)
                    if max_value is None or maxv > max_value:
                        max_value = maxv
                    if min_value is None or minv < min_value:
                        min_value = minv                        
                    heatmap_data[name] = values

            heatmap_center_value = 0
            heatmap_color_center = "white"
            heatmap_color_up = 0.3
            heatmap_color_down = 0.7
            heatmap_color_missing = "black"
            
            heatmap_max_value = abs(heatmap_center_value - max_value)
            heatmap_min_value = abs(heatmap_center_value - min_value)
                    
            if heatmap_center_value <= min_value:
                heatmap_max_value = heatmap_min_value + heatmap_max_value
            else:
                heatmap_max_value = max(heatmap_min_value, heatmap_max_value)

                    
                    
        # scale the tree
        if not args.height: 
            args.height = None
        if not args.width: 
            args.width = None

        f2color = {}
        f2last_seed = {}
        for node in t.traverse():
            node.img_style['size'] = 0                
            if len(node.children) == 1:
                node.img_style['size'] = 2                
                node.img_style['shape'] = "square"
                node.img_style['fgcolor'] = "steelblue"

            ftype_pos = defaultdict(int)

            for findex, f in enumerate(FACES):
                if (f['nodetype'] == 'any' or 
                    (f['nodetype'] == 'leaf' and node.is_leaf()) or
                    (f['nodetype'] == 'internal' and not node.is_leaf())):


                    # if node passes face filters
                    if node_matcher(node, f["filters"]):
                        if f["value"].startswith("@"):
                            fvalue = getattr(node, f["value"][1:], None)
                        else:
                            fvalue = f["value"]

                        # if node's attribute has content, generate face
                        if fvalue is not None:
                            fsize = f["size"]
                            fbgcolor = f["bgcolor"]
                            fcolor = f['color']

                            if fcolor:
                                # Parse color options
                                auto_m = re.search("auto\(([^)]*)\)", fcolor)
                                if auto_m:
                                    target_attr = auto_m.groups()[0].strip()
                                    if not target_attr :
                                        color_keyattr = f["value"]
                                    else:
                                        color_keyattr = target_attr

                                    color_keyattr = color_keyattr.lstrip('@')
                                    color_bin = getattr(node, color_keyattr, None)

                                    last_seed = f2last_seed.setdefault(color_keyattr, random.random())

                                    seed = last_seed + 0.10 + random.uniform(0.1, 0.2)
                                    f2last_seed[color_keyattr] = seed

                                    fcolor = f2color.setdefault(color_bin, random_color(h=seed))

                            if fbgcolor:
                                # Parse color options
                                auto_m = re.search("auto\(([^)]*)\)", fbgcolor)
                                if auto_m:
                                    target_attr = auto_m.groups()[0].strip()
                                    if not target_attr :
                                        color_keyattr = f["value"]
                                    else:
                                        color_keyattr = target_attr

                                    color_keyattr = color_keyattr.lstrip('@')
                                    color_bin = getattr(node, color_keyattr, None)

                                    last_seed = f2last_seed.setdefault(color_keyattr, random.random())

                                    seed = last_seed + 0.10 + random.uniform(0.1, 0.2)
                                    f2last_seed[color_keyattr] = seed

                                    fbgcolor = f2color.setdefault(color_bin, random_color(h=seed))

                            if f["ftype"] == "text":
                                if f.get("format", None):
                                    fvalue = f["format"] % fvalue

                                F = TextFace(fvalue,
                                             fsize = fsize,
                                             fgcolor = fcolor or "black",
                                             fstyle = f.get('fstyle', None))

                            elif f["ftype"] == "fullseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="seq",
                                                       seqtail_format="seq",
                                                       height=fsize)
                            elif f["ftype"] == "compactseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="compactseq",
                                                       seqtail_format="compactseq",
                                                       height=fsize)
                            elif f["ftype"] == "blockseq":
                                F = faces.SeqMotifFace(seq=fvalue, seq_format="blockseq",
                                                   seqtail_format="blockseq",
                                                       height=fsize,
                                                       fgcolor=fcolor or "slategrey",
                                                       bgcolor=fbgcolor or "slategrey",
                                                       scale_factor = 1.0)
                                fbgcolor = None
                            elif f["ftype"] == "bubble":
                                try:
                                    v = float(fvalue)
                                except ValueError:
                                    rad = fsize
                                else:
                                    rad = fsize * v
                                F = faces.CircleFace(radius=rad, style="sphere",
                                                     color=fcolor or "steelblue")

                            elif f["ftype"] == "heatmap":
                                if not f['column']:
                                    col = ftype_pos[f["pos"]]
                                else:
                                    col = f["column"]

                                for i, value in enumerate(heatmap_data.get(node.name, [])):
                                    ftype_pos[f["pos"]] += 1

                                    if value is None:
                                        color = heatmap_color_missing
                                    elif value > heatmap_center_value:
                                        color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_up)
                                    elif value < heatmap_center_value:
                                        color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_down)
                                    else:
                                        color = heatmap_color_center
                                    node.add_face(RectFace(20, 20, color, color), position="aligned", column=col + i)
                                    # Add header 
                                    # for i, name in enumerate(header):
                                    #    nameF = TextFace(name, fsize=7)
                                    #    nameF.rotation = -90
                                    #    tree_style.aligned_header.add_face(nameF, column=i)
                                F = None

                            elif f["ftype"] == "profile":
                                # internal profiles?
                                F = None
                            elif f["ftype"] == "barchart":
                                F = None
                            elif f["ftype"] == "piechart":
                                F = None



                            # Add the Face
                            if F:
                                F.opacity = f['opacity'] or 1.0

                                # Set face general attributes
                                if fbgcolor:
                                    F.background.color = fbgcolor

                                if not f['column']:
                                    col = ftype_pos[f["pos"]]
                                    ftype_pos[f["pos"]] += 1    
                                else:
                                    col = f["column"]
                                node.add_face(F, column=col, position=f["pos"])

        if args.image:
            t.render("t%d.%s" %(tindex, args.image),
                     tree_style=ts, w=args.width, h=args.height, units=args.size_units)
        else:
            t.show(None, tree_style=ts)
            realname = name2realname.get(name, name)
            score = name2score.get(name, "Exact:1.0")
            print "\t".join(
                map(str,
                    [score, name, realname.capitalize(), taxid]))

    if args.taxid_file:
        all_taxids.extend(
            map(strip,
                open(args.taxid_file, "rU").read().split("\n")))
    if args.taxid:
        all_taxids.extend(args.taxid)

    reftree = None
    if args.reftree:
        reftree = PhyloTree(args.reftree)
        all_taxids.extend(list(set([n.name for n in reftree.iter_leaves()])))

    if all_taxids and args.info:
        log.info("Dumping %d taxid translations:" % len(all_taxids))
        all_taxids = set(all_taxids)
        all_taxids.discard("")
        translator = get_taxid_translator(all_taxids)
        for taxid, name in translator.iteritems():
            lineage = get_sp_lineage(taxid)
            named_lineage = ','.join(translate_to_names(lineage))
            lineage = ','.join(map(str, lineage))
            print "\t".join(map(str, [taxid, name, named_lineage, lineage]))
        for notfound in all_taxids - set(
                str(k) for k in translator.iterkeys()):
            print >> sys.stderr, notfound, "NOT FOUND"
Exemple #22
0
    exit()

# create output directory
events_dir = '%s' '/events' % (str(sys.argv[1]))
if not os.path.exists(events_dir):
    os.makedirs(events_dir)

# get tree files from directory
tree_files = os.listdir(str(sys.argv[1]))
for tree in tree_files:
    if re.match(r"^\d+\.fna\.aln.+\.tree", tree):
        # get orthogroup id
        ortho = re.sub(r'\D', "", tree)
        # load newick tree
        #print(tree)
        t = PhyloTree(tree)
        #print(t)
        evts = file('%s' '/' '%s' '.temp' % (events_dir, tree), "w")
        ##########################################################################################
        # 			evolutionary events involving all taxa
        ##########################################################################################
        # Alternatively, you can scan the whole tree topology
        events = t.get_descendant_evol_events()
        # print its orthology and paralogy relationships
        for ev in events:
            if ev.etype == "S":
                evts.write(",".join(ev.in_seqs))
                evts.write("<===>")
                evts.write(",".join(ev.out_seqs))
                evts.write("\n")
            elif ev.etype == "D":
Exemple #23
0
#!/usr/bin/python
from __future__ import absolute_import

import sys
from ete2 import PhyloTree

if __name__ == "__main__":
    t = sys.argv[1]
    s = sys.argv[2]
    out = sys.argv[3]

    pt = PhyloTree(t)
#    pt.link_to_alignment(alignment=s)
    pt.render(out)