def build_hmm_from_tree(base,tree_name,aln_name,msa_dir,hmm_dir): ''' Reads tree and corresponding msa and create an MSA & HMM for each internal node. ''' # Annotate internal nodes with name of corresponding HMM. pt = PhyloTree(tree_name,alignment=aln_name,alg_format="fasta") i_node = 0 for node in pt.traverse(): if not node.is_leaf(): node_name = 'node%s' % (str(i_node)) node.add_features(hmm=node_name) i_node += 1 # make msa for node msa_string = [] for leaf in node.iter_leaves(): msa_string.append(">%s" % leaf.name) msa_string.append(str(leaf.sequence)) msa_string = '\n'.join(msa_string) msa = open('%s%s.aln' % (msa_dir, node_name),'w'); msa.write(msa_string); msa.close() # build HMM for node check_call(['python', 'build_hmmer3_hmm_from_alignment.py', '--name', '%s%s' % (hmm_dir, node_name), '%s%s.aln' % (msa_dir, node_name)]) #concatenate HMMs into one file for Hmmscan os.system('cat %s*.hmm > %s%s_concat.hmm' % (hmm_dir, hmm_dir, base)) return pt
def run(args): from ete2 import Tree, PhyloTree for nw in args.src_tree_iterator: if args.orthologs is not None: t = PhyloTree(nw) for e in t.get_descendant_evol_events(): print e.in_seqs, e.out_seqs
def load_tree_sequences(nwk_file, fasta_file): ''' Load a tree with associated sequences on leaves. ''' tree = PhyloTree(newick=nwk_file, format=1) tree.link_to_alignment(alignment=fasta_file, alg_format='fasta') return tree
def get_example_tree(): # Performs a tree reconciliation analysis gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));' species_tree_nw = "((((Hsa, Ptr), Mmu), (Mms, Cfa)), Dme);" genetree = PhyloTree(gene_tree_nw) sptree = PhyloTree(species_tree_nw) recon_tree, events = genetree.reconcile(sptree) recon_tree.link_to_alignment(alg) return recon_tree, TreeStyle()
def my_tree_loader(tree): """ This function is used to load trees within the WebTreeApplication object. """ t = PhyloTree(tree, sp_naming_function=None) #Check one leaf to see if species information is included if t.get_leaves()[0].species == "Unknown": t.set_species_naming_function(extract_species_code) return t
def load_tree(f_tree, f_align): # Tree style ts = TreeStyle() ts.show_leaf_name = True ts.show_branch_length = True ts.show_branch_support = True ts.branch_vertical_margin = 10 # Load phylogenetic tree tree = PhyloTree(f_tree.read()) if f_align is not None: tree.link_to_alignment(f_align.read()) return tree, ts
def integrate_pwids_into_tree(tree, alignment): '''Takes a tree and an alignment and returns a new tree with values of pwid added to each node in the tree as node.pwid.''' pt = PhyloTree(tree,alignment=alignment,alg_format="fasta") for ind, node in enumerate(pt.traverse()): node.node_kerf_name = 'node%s' % str(ind).zfill(3) # For later kerf and sh functions node.kerf_pass = False node.sh_pass = False if not node.is_leaf(): node.min_pwid = get_min_pwid_of_leaves(node.get_leaves()) else: node.min_pwid = 1.0 return pt
def main(): fn=sys.argv[1] nw=open(fn).readline() species={} t=PhyloTree(nw) #set species naming function t.set_species_naming_function(_get_spcode) for l in t.get_leaves(): spCode=l.species try: species[spCode]+=1 except: species[spCode] =1 for spCode in sorted( species, key=lambda x: species[x], reverse=True ): print '%s\t%s' % ( spCode,species[spCode] )
def test_01tree_annotation(self): t = PhyloTree( "((9598, 9606), 10090);", sp_naming_function=lambda name: name) t.annotate_ncbi_taxa(dbfile=DATABASE_PATH) self.assertEqual(t.sci_name, 'Euarchontoglires') homi = (t&'9606').up self.assertEqual(homi.sci_name, 'Homininae') self.assertEqual(homi.taxid, 207598) self.assertEqual(homi.rank, 'subfamily') self.assertEqual(homi.named_lineage, [u'root', u'cellular organisms', u'Eukaryota', u'Opisthokonta', u'Metazoa', u'Eumetazoa', u'Bilateria', u'Deuterostomia', u'Chordata', u'Craniata', u'Vertebrata', u'Gnathostomata', u'Teleostomi', u'Euteleostomi', u'Sarcopterygii', u'Dipnotetrapodomorpha', u'Tetrapoda', u'Amniota', u'Mammalia', u'Theria', u'Eutheria', u'Boreoeutheria', u'Euarchontoglires', u'Primates', u'Haplorrhini', u'Simiiformes', u'Catarrhini', u'Hominoidea', u'Hominidae', u'Homininae']) self.assertEqual(homi.lineage, [1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598] ) human = t&'9606' self.assertEqual(human.sci_name, 'H**o sapiens') self.assertEqual(human.taxid, 9606) self.assertEqual(human.rank, 'species') self.assertEqual(human.named_lineage, [u'root', u'cellular organisms', u'Eukaryota', u'Opisthokonta', u'Metazoa', u'Eumetazoa', u'Bilateria', u'Deuterostomia', u'Chordata', u'Craniata', u'Vertebrata', u'Gnathostomata', u'Teleostomi', u'Euteleostomi', u'Sarcopterygii', u'Dipnotetrapodomorpha', u'Tetrapoda', u'Amniota', u'Mammalia', u'Theria', u'Eutheria', u'Boreoeutheria', u'Euarchontoglires', u'Primates', u'Haplorrhini', u'Simiiformes', u'Catarrhini', u'Hominoidea', u'Hominidae', u'Homininae', u'H**o', u'H**o sapiens']) self.assertEqual(human.lineage, [1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598, 9605, 9606])
def main(): fn = sys.argv[1] nw = open(fn).readline() species = {} t = PhyloTree(nw) #set species naming function t.set_species_naming_function(_get_spcode) for l in t.get_leaves(): spCode = l.species try: species[spCode] += 1 except: species[spCode] = 1 for spCode in sorted(species, key=lambda x: species[x], reverse=True): print '%s\t%s' % (spCode, species[spCode])
def phylomedump_tree_iterator( tarfn,verbose=0 ): """PhylomeDB all_trees.tar.gz dump treeobj generator.""" #open tarfile if tarfn.endswith(".gz"): tar = tarfile.open(tarfn, "r:gz") else: tar = tarfile.open(tarfn, "r") i = k = 0 #process entries for m in tar: #if i>100: break if not m.isfile(): continue #load tree if m.name.endswith(".nw"): i += 1 #get nw nw = tar.extractfile(m).readline() t = PhyloTree(nw) ##add seedid and method info #Phy000CWA9_YEAST.JTT.nw --> Phy000CWA9_YEAST JTT seedid, method = os.path.basename(m.name).split(".")[:2] t.seedid = seedid t.method = method #or add lk, seedid, method and lk to treeobj elif m.name.endswith(".lk"): seedid, method, lk = tar.extractfile(m).readline().split('\t')[:3] t.lk = float(lk) if not t.lk: sys.stderr.write( " Err: Zero likelihood (%s) for: %s\n" % (t.lk, ", ".join((t.seedid, t.method)))) continue if seedid!=t.seedid or t.method != method: sys.stderr.write( " Err: Seedid and/or method doesn't match: %s\n" % ", ".join((seedid, t.seedid, method, t.method))) continue k += 1 if verbose and not i%100: sys.stderr.write( " %6i\r" % i ) yield t if verbose: sys.stderr.write( " %s out of %s trees succesfully parsed [memory: %s KB]\n" % (k, i, resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
def get_tree(self, protid, method, phylome_id): """ Returns the method-tree associated to a given protid. """ cmd = 'SELECT newick,lk FROM %s WHERE phylome_id=%s AND species="%s" AND protid="%s" AND method ="%s"' %\ (self._trees_table, phylome_id, protid[:3],protid[3:],method) if self._SQL.execute(cmd): entry = self._SQL.fetchone() nw = entry[0] lk = float(entry[1]) t = PhyloTree(nw) else: t = None lk = None return t, lk
def build_hmm_from_tree(tree_name, aln_name, msa_dir, hmm_dir): ''' Reads tree and corresponding msa and create an MSA & HMM for each internal node. ''' # Annotate internal nodes with name of corresponding HMM. pt = PhyloTree(tree_name, alignment=aln_name, alg_format="fasta") i_node = 0 for node in pt.traverse(): if not node.is_leaf(): node_name = 'node%s' % (str(i_node)) #print node_name #print node node.add_features(hmm=node_name) i_node += 1 # make msa for node msa_string = [] for leaf in node.iter_leaves(): msa_string.append(">%s" % leaf.name) msa_string.append(str(leaf.sequence)) msa_string = '\n'.join(msa_string) msa = open('%s%s.aln' % (msa_dir, node_name), 'w') msa.write(msa_string) msa.close() # build HMM for node check_call([ 'build_hmmer3_hmm_from_alignment.py', '--name', '%s%s' % (hmm_dir, node_name), '%s%s.aln' % (msa_dir, node_name) ]) #concatenate HMMs into one file for Hmmscan os.system('cat %s*.hmm > %sconcat.hmm' % (hmm_dir, hmm_dir)) return pt
def get_topology(taxids, intermediate_nodes=False, rank_limit=None): from ete2 import PhyloTree sp2track = {} elem2node = {} for sp in taxids: track = deque() lineage = get_sp_lineage(sp) id2rank = get_ranks(lineage) for elem in lineage: node = elem2node.setdefault(elem, PhyloTree()) node.name = str(elem) node.add_feature("rank", str(id2rank.get(int(elem), "?"))) track.append(node) sp2track[sp] = track # generate parent child relationships for sp, track in sp2track.iteritems(): parent = None for elem in track: if parent and elem not in parent.children: parent.add_child(elem) if rank_limit and elem.rank == rank_limit: break parent = elem root = elem2node[1] # This fixes cases in which requested taxids are internal nodes #for x in set(sp2track) - set([n.name for n in root.iter_leaves()]): # new_leaf = sp2track[x][-1].copy() # for ch in new_leaf.get_children(): # ch.detach() # sp2track[x][-1].add_child(new_leaf) #remove onechild-nodes if not intermediate_nodes: for n in root.get_descendants(): if len(n.children) == 1 and int(n.name) not in taxids: n.delete(prevent_nondicotomic=False) if len(root.children) == 1: return root.children[0].detach() else: return root
def get_best_tree(self, protid, phylome_id): """ Returns the winner ML tree""" likelihoods = {} winner_model = None winner_lk = None winner_newick = None t = None command ='SELECT newick,method,lk FROM %s WHERE phylome_id=%s AND species="%s" and protid="%s";' \ % (self._trees_table,phylome_id, protid[:3], protid[3:]) self._SQL.execute(command) result = self._SQL.fetchall() for r in result: nw, m, lk = r if lk < 0: likelihoods[m] = lk if winner_lk == None or lk > winner_lk: winner_lk = lk winner_model = m winner_newick = nw if winner_newick: t = PhyloTree(winner_newick) return winner_model, likelihoods, t
def test_01tree_annotation(self): t = PhyloTree("((9598, 9606), 10090);", sp_naming_function=lambda name: name) t.annotate_ncbi_taxa(dbfile=DATABASE_PATH) self.assertEqual(t.sci_name, "Euarchontoglires") homi = (t & "9606").up self.assertEqual(homi.sci_name, "Homininae") self.assertEqual(homi.taxid, 207598) self.assertEqual(homi.rank, "subfamily") self.assertEqual( homi.named_lineage, [ u"root", u"cellular organisms", u"Eukaryota", u"Opisthokonta", u"Metazoa", u"Eumetazoa", u"Bilateria", u"Deuterostomia", u"Chordata", u"Craniata", u"Vertebrata", u"Gnathostomata", u"Teleostomi", u"Euteleostomi", u"Sarcopterygii", u"Dipnotetrapodomorpha", u"Tetrapoda", u"Amniota", u"Mammalia", u"Theria", u"Eutheria", u"Boreoeutheria", u"Euarchontoglires", u"Primates", u"Haplorrhini", u"Simiiformes", u"Catarrhini", u"Hominoidea", u"Hominidae", u"Homininae", ], ) self.assertEqual( homi.lineage, [ 1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598, ], ) human = t & "9606" self.assertEqual(human.sci_name, "H**o sapiens") self.assertEqual(human.taxid, 9606) self.assertEqual(human.rank, "species") self.assertEqual( human.named_lineage, [ u"root", u"cellular organisms", u"Eukaryota", u"Opisthokonta", u"Metazoa", u"Eumetazoa", u"Bilateria", u"Deuterostomia", u"Chordata", u"Craniata", u"Vertebrata", u"Gnathostomata", u"Teleostomi", u"Euteleostomi", u"Sarcopterygii", u"Dipnotetrapodomorpha", u"Tetrapoda", u"Amniota", u"Mammalia", u"Theria", u"Eutheria", u"Boreoeutheria", u"Euarchontoglires", u"Primates", u"Haplorrhini", u"Simiiformes", u"Catarrhini", u"Hominoidea", u"Hominidae", u"Homininae", u"H**o", u"H**o sapiens", ], ) self.assertEqual( human.lineage, [ 1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598, 9605, 9606, ], )
parser.add_argument("--newick", dest="newick", action="store_true", default="", help=("print the extended newick format for provided tree using" " ASCII representation and all its evolutionary events" " before orthoXML export")) args = parser.parse_args() newick = args.tree[0] SPECIES_NAME_POS = args.species_field SPECIES_NAME_DELIMITER = args.species_delimiter # load a phylomeDB Tree provided as a newick file in the command line t = PhyloTree(newick, sp_naming_function=extract_spname) if args.root: if len(args.root) > 1: outgroup = t.get_common_ancestor(args.root) else: outgroup = t & args.root[0] t.set_outgroup(outgroup) if not args.skip_ortholog_detection: # detect speciation and duplication events using the species overlap # algorithm used in phylomeDB t.get_descendant_evol_events() if args.ascii:
pie = PieChartFace([changes[node.name][0], changes[node.name][1]], changes[node.name][2], changes[node.name][2], ["Green", "Red"]) pie.opacity = 0.5 #faces.add_face_to_node(name_face, node, column=0, position="branch-right") faces.add_face_to_node(pie, node, column=0, position="float") ts = TreeStyle() # Do not add leaf names automatically ts.show_leaf_name = False # Use my custom layout ts.show_leaf_name = False ts.layout_fn = my_layout t = PhyloTree( '/Volumes/MP_HD/Pm_Ts_Tf_Pf_comparison/4_spec_gene_gain_loss/eurot_gene_gain_loss/node_assignments_tree_nos_only_dbl.nwk', format=1) dataorder = [ 'A. fumigatus', 'N. fisheri', 'A. clavatus', 'A. terreus', 'A. flavus', 'A. oryzae', 'A. niger', 'A. nidulans', 'P. decumbens', 'P. roquefortii', 'P. chrysogenum', 'P. digitatum', 'T. stipitatus', 'P. funiculosum', 'T. marneffei', 'T. flavus', 'A. dermatiditis', 'H. capsulatum', 'P. brasiliensis', 'C. immitis', 'U. reesei', 'T. equinum', 'T. tonsurans' ] nos = [ "1", '2', '4', '6', '7', '8', '12', '14', '16', '17', '18', '19', '24', '25', '26', '27', '32', '33', '35', '37', '38', '40', '41' ] branch_to_node = {
def get_topology(self, taxids, intermediate_nodes=False, rank_limit=None, collapse_subspecies=False): """Given a list of taxid numbers, return the minimal pruned NCBI taxonomy tree containing all of them. :param False intermediate_nodes: If True, single child nodes representing the complete lineage of leaf nodes are kept. Otherwise, the tree is pruned to contain the first common ancestor of each group. :param None rank_limit: If valid NCBI rank name is provided, the tree is pruned at that given level. For instance, use rank="species" to get rid of sub-species or strain leaf nodes. """ from ete2 import PhyloTree sp2track = {} elem2node = {} for sp in taxids: track = deque() lineage = self.get_sp_lineage(sp) id2rank = self.get_ranks(lineage) for elem in lineage: node = elem2node.setdefault(elem, PhyloTree()) node.name = str(elem) node.add_feature("rank", str(id2rank.get(int(elem), "?"))) track.append(node) sp2track[sp] = track # generate parent child relationships for sp, track in sp2track.iteritems(): parent = None for elem in track: if parent and elem not in parent.children: parent.add_child(elem) if rank_limit and elem.rank == rank_limit: break parent = elem root = elem2node[1] # This fixes cases in which requested taxids are internal nodes #for x in set(sp2track) - set([n.name for n in root.iter_leaves()]): # new_leaf = sp2track[x][-1].copy() # for ch in new_leaf.get_children(): # ch.detach() # sp2track[x][-1].add_child(new_leaf) #remove onechild-nodes if not intermediate_nodes: for n in root.get_descendants(): if len(n.children) == 1 and int(n.name) not in taxids: n.delete(prevent_nondicotomic=False) if collapse_subspecies: species_nodes = [n for n in t.traverse() if n.rank == "species" if int(n.taxid) in all_taxids] for sp_node in species_nodes: bellow = sp_node.get_descendants() if bellow: # creates a copy of the species node connector = sp_node.__class__() for f in sp_node.features: connector.add_feature(f, getattr(sp_node, f)) connector.name = connector.name + "{species}" for n in bellow: n.detach() n.name = n.name + "{%s}" %n.rank sp_node.add_child(n) sp_node.add_child(connector) sp_node.add_feature("collapse_subspecies", "1") if len(root.children) == 1: return root.children[0].detach() else: return root
def run(args): from ete2 import Tree, PhyloTree features = set() for nw in args.src_tree_iterator: if args.ncbi: tree = PhyloTree(nw) features.update(["taxid", "name", "rank", "bgcolor", "sci_name", "collapse_subspecies", "named_lineage", "lineage"]) tree.annotate_ncbi_taxa(args.taxid_attr) else: tree = Tree(nw) type2cast = {"str":str, "int":int, "float":float, "set":set, "list":list} for annotation in args.feature: aname, asource, amultiple, acast = None, None, False, str for field in annotation: try: key, value = map(strip, field.split(":")) except Exception: raise ValueError("Invalid feature option [%s]" %field ) if key == "name": aname = value elif key == "source": asource = value elif key == "multiple": #append amultiple = value elif key == "type": try: acast = type2cast[value] except KeyError: raise ValueError("Invalid feature type [%s]" %field) else: raise ValueError("Unknown feature option [%s]" %field) if not aname and not asource: ValueError('name and source are required when annotating a new feature [%s]' % annotation) features.add(aname) for line in open(asource, 'rU'): line = line.strip() if not line or line.startswith('#'): continue nodenames, attr_value = map(strip, line.split('\t')) nodenames = map(strip, nodenames.split(',')) relaxed_grouping = True if nodenames[0].startswith('!'): relaxed_grouping = False nodenames[0] = nodenames[0][1:] if len(nodenames) > 1: target_node = tree.get_common_ancestor(nodenames) if not relaxed_grouping: pass # do something else: target_node = tree & nodenames[0] if hasattr(target_node, aname): log.warning('Overwriting annotation for node" [%s]"' %nodenames) else: target_node.add_feature(aname, acast(attr_value)) dump(tree, features=features)
from ete2 import PhyloTree # Loads an example tree nw = """ ((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)), (Ptr_002,(Hsa_002,Mmu_002)))); """ t = PhyloTree(nw) print t # /-Dme_001 # /--------| # | \-Dme_002 # | # | /-Cfa_001 # | /--------| #---------| | \-Mms_001 # | /--------| # | | | /-Hsa_001 # | | | /--------| # | | \--------| \-Ptr_001 # \--------| | # | \-Mmu_001 # | # | /-Ptr_002 # \--------| # | /-Hsa_002 # \--------| # \-Mmu_002 # # To obtain all the evolutionary events involving a given leaf node we # use get_my_evol_events method matches = t.search_nodes(name="Hsa_001")
["Green", "Red"]) pie.opacity = 0.5 #faces.add_face_to_node(name_face, node, column=0, position="branch-right") faces.add_face_to_node(pie, node, column=0, position="float") ts = TreeStyle() # Do not add leaf names automatically ts.show_leaf_name = False # Use my custom layout ts.show_leaf_name = False ts.layout_fn = my_layout #t = PhyloTree('/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree_nos.nwk', format=1) t = PhyloTree( '/Volumes/MP_HD/CI_GENOME_SEQ/CI_orthomcl_data/gain_loss_tree_frm_orthogroups/CI_badirate_branch_no_tree_no_names.nwk', format=1) #dataorder = ['FRR2161','FRR3841','FRR4059','FRR3840','F4','BR2SD2','BR2','BR2SD1','G09043','G11702','G11203SD4','G11203SD3','G11203','G11203SD1','G09027SD2','G09027SD1','G09027','FRR3871','FRR3482','HR2','G11012'] #nos = ["1",'2','4','5','7','10','11','13','15','17','19','20','22','24','26','27','29','32','33','35','37'] #dataorder = ['FRR2161','FRR3841','FRR3840','FRR4059','F4','BR2SD2','BR2','BR2SD1','G09043','G11702','G11203SD4','G11203SD3','G11203','G11203SD1','G09027SD2','G09027SD1','G09027','FRR3871','FRR3482','HR2','G11012'] #nos = ["1",'2','4','5','7','10','11','13','15','17','19','20','22','24','26','27','29','32','33','35','37'] #branch_to_node = {24:25,39:38,41:43,33:35,5:5,31:0,18:22,14:16,40:42,42:41,35:36,36:32,27:30,15:2,26:29,12:15,29:26,21:19,11:4,32:34,6:11,17:20,22:17,16:18,13:3,34:33,43:37,3:6,7:13,8:14,37:39,10:10,44:31,30:24,20:21,2:8,1:7,38:40,28:28,4:9,25:27,19:23,23:1,9:12} #print branch_to_node inchanges = open( '/Volumes/MP_HD/CI_GENOME_SEQ/CI_orthomcl_data/gain_loss_tree_frm_orthogroups/CI_denovo_gene_gain_loss_table.txt', 'r')
def run(args): if args.text_mode: from ete2 import Tree for tindex, tfile in enumerate(args.src_tree_iterator): #print tfile if args.raxml: nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read()) t = Tree(nw) else: t = Tree(tfile) print t.get_ascii(show_internal=args.show_internal_names, attributes=args.show_attributes) return import random import re import colorsys from collections import defaultdict from ete2 import (Tree, PhyloTree, TextFace, RectFace, faces, TreeStyle, add_face_to_node, random_color) global FACES if args.face: FACES = parse_faces(args.face) else: FACES = [] # VISUALIZATION ts = TreeStyle() ts.mode = args.mode ts.show_leaf_name = True ts.tree_width = args.tree_width for f in FACES: if f["value"] == "@name": ts.show_leaf_name = False break if args.as_ncbi: ts.show_leaf_name = False FACES.extend(parse_faces( ['value:@sci_name, size:10, fstyle:italic', 'value:@taxid, color:grey, size:6, format:" - %s"', 'value:@sci_name, color:steelblue, size:7, pos:b-top, nodetype:internal', 'value:@rank, color:indianred, size:6, pos:b-bottom, nodetype:internal', ])) if args.alg: FACES.extend(parse_faces( ['value:@sequence, size:10, pos:aligned, ftype:%s' %args.alg_type] )) if args.heatmap: FACES.extend(parse_faces( ['value:@name, size:10, pos:aligned, ftype:heatmap'] )) if args.bubbles: for bubble in args.bubbles: FACES.extend(parse_faces( ['value:@%s, pos:float, ftype:bubble, opacity:0.4' %bubble, ])) ts.branch_vertical_margin = args.branch_separation if args.show_support: ts.show_branch_support = True if args.show_branch_length: ts.show_branch_length = True if args.force_topology: ts.force_topology = True ts.layout_fn = lambda x: None for tindex, tfile in enumerate(args.src_tree_iterator): #print tfile if args.raxml: nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read()) t = PhyloTree(nw) else: t = PhyloTree(tfile) if args.alg: t.link_to_alignment(args.alg, alg_format=args.alg_format) if args.heatmap: DEFAULT_COLOR_SATURATION = 0.3 BASE_LIGHTNESS = 0.7 def gradient_color(value, max_value, saturation=0.5, hue=0.1): def rgb2hex(rgb): return '#%02x%02x%02x' % rgb def hls2hex(h, l, s): return rgb2hex( tuple(map(lambda x: int(x*255), colorsys.hls_to_rgb(h, l, s)))) lightness = 1 - (value * BASE_LIGHTNESS) / max_value return hls2hex(hue, lightness, DEFAULT_COLOR_SATURATION) heatmap_data = {} max_value, min_value = None, None for line in open(args.heatmap): if line.startswith('#COLNAMES'): pass elif line.startswith('#') or not line.strip(): pass else: fields = line.split('\t') name = fields[0].strip() values = map(lambda x: float(x) if x else None, fields[1:]) maxv = max(values) minv = min(values) if max_value is None or maxv > max_value: max_value = maxv if min_value is None or minv < min_value: min_value = minv heatmap_data[name] = values heatmap_center_value = 0 heatmap_color_center = "white" heatmap_color_up = 0.3 heatmap_color_down = 0.7 heatmap_color_missing = "black" heatmap_max_value = abs(heatmap_center_value - max_value) heatmap_min_value = abs(heatmap_center_value - min_value) if heatmap_center_value <= min_value: heatmap_max_value = heatmap_min_value + heatmap_max_value else: heatmap_max_value = max(heatmap_min_value, heatmap_max_value) # scale the tree if not args.height: args.height = None if not args.width: args.width = None f2color = {} f2last_seed = {} for node in t.traverse(): node.img_style['size'] = 0 if len(node.children) == 1: node.img_style['size'] = 2 node.img_style['shape'] = "square" node.img_style['fgcolor'] = "steelblue" ftype_pos = defaultdict(int) for findex, f in enumerate(FACES): if (f['nodetype'] == 'any' or (f['nodetype'] == 'leaf' and node.is_leaf()) or (f['nodetype'] == 'internal' and not node.is_leaf())): # if node passes face filters if node_matcher(node, f["filters"]): if f["value"].startswith("@"): fvalue = getattr(node, f["value"][1:], None) else: fvalue = f["value"] # if node's attribute has content, generate face if fvalue is not None: fsize = f["size"] fbgcolor = f["bgcolor"] fcolor = f['color'] if fcolor: # Parse color options auto_m = re.search("auto\(([^)]*)\)", fcolor) if auto_m: target_attr = auto_m.groups()[0].strip() if not target_attr : color_keyattr = f["value"] else: color_keyattr = target_attr color_keyattr = color_keyattr.lstrip('@') color_bin = getattr(node, color_keyattr, None) last_seed = f2last_seed.setdefault(color_keyattr, random.random()) seed = last_seed + 0.10 + random.uniform(0.1, 0.2) f2last_seed[color_keyattr] = seed fcolor = f2color.setdefault(color_bin, random_color(h=seed)) if fbgcolor: # Parse color options auto_m = re.search("auto\(([^)]*)\)", fbgcolor) if auto_m: target_attr = auto_m.groups()[0].strip() if not target_attr : color_keyattr = f["value"] else: color_keyattr = target_attr color_keyattr = color_keyattr.lstrip('@') color_bin = getattr(node, color_keyattr, None) last_seed = f2last_seed.setdefault(color_keyattr, random.random()) seed = last_seed + 0.10 + random.uniform(0.1, 0.2) f2last_seed[color_keyattr] = seed fbgcolor = f2color.setdefault(color_bin, random_color(h=seed)) if f["ftype"] == "text": if f.get("format", None): fvalue = f["format"] % fvalue F = TextFace(fvalue, fsize = fsize, fgcolor = fcolor or "black", fstyle = f.get('fstyle', None)) elif f["ftype"] == "fullseq": F = faces.SeqMotifFace(seq=fvalue, seq_format="seq", seqtail_format="seq", height=fsize) elif f["ftype"] == "compactseq": F = faces.SeqMotifFace(seq=fvalue, seq_format="compactseq", seqtail_format="compactseq", height=fsize) elif f["ftype"] == "blockseq": F = faces.SeqMotifFace(seq=fvalue, seq_format="blockseq", seqtail_format="blockseq", height=fsize, fgcolor=fcolor or "slategrey", bgcolor=fbgcolor or "slategrey", scale_factor = 1.0) fbgcolor = None elif f["ftype"] == "bubble": try: v = float(fvalue) except ValueError: rad = fsize else: rad = fsize * v F = faces.CircleFace(radius=rad, style="sphere", color=fcolor or "steelblue") elif f["ftype"] == "heatmap": if not f['column']: col = ftype_pos[f["pos"]] else: col = f["column"] for i, value in enumerate(heatmap_data.get(node.name, [])): ftype_pos[f["pos"]] += 1 if value is None: color = heatmap_color_missing elif value > heatmap_center_value: color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_up) elif value < heatmap_center_value: color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_down) else: color = heatmap_color_center node.add_face(RectFace(20, 20, color, color), position="aligned", column=col + i) # Add header # for i, name in enumerate(header): # nameF = TextFace(name, fsize=7) # nameF.rotation = -90 # tree_style.aligned_header.add_face(nameF, column=i) F = None elif f["ftype"] == "profile": # internal profiles? F = None elif f["ftype"] == "barchart": F = None elif f["ftype"] == "piechart": F = None # Add the Face if F: F.opacity = f['opacity'] or 1.0 # Set face general attributes if fbgcolor: F.background.color = fbgcolor if not f['column']: col = ftype_pos[f["pos"]] ftype_pos[f["pos"]] += 1 else: col = f["column"] node.add_face(F, column=col, position=f["pos"]) if args.image: t.render("t%d.%s" %(tindex, args.image), tree_style=ts, w=args.width, h=args.height, units=args.size_units) else: t.show(None, tree_style=ts)
log=open(log_file, "w") except: log=open("gephcort_run.log", "w") # Logging start time log.write("Start time: "+str(time.localtime()[0])+"-"+str(time.localtime()[2])+"-"+str(time.localtime()[1])+"\t"+str(time.localtime()[3])+":"+str(time.localtime()[4])+":"+str(time.localtime()[5])+"\n") ape_objects={"delta.plot":"delta_plot", "dist.dna":"dist_dna", "dist.nodes":"dist_nodes", "node.depth":"node_depth", "node.depth.edgelength":"node_depth_edgelength","node.height":"node_height", "node.height.clado":"node_height_clado", "prop.part":"prop_part"} ape=importr("ape", robject_translations = ape_objects) # Required for phangorn ph=importr("phangorn") # Phylogenetic operations in R print "All modules imported successfully" t = PhyloTree(intree, alignment=seq, alg_format=seq_format) # Main tree containing entire sequence dtp = PhyloTree(intree) # Dummy tree for phenotype shuffling print "Tree file read successfully" phenfile=open(phen, "r") # Phenotype file phenlist=[] for line in phenfile.readlines(): phenlist.append([line.split("\t")[0].strip(), line.split("\t")[1].strip()]) phenfile.close() phenotype={} # Dictionary containing species names and their phenotype values # Phenotype file should have two columns separated by tab containing taxa name # in the first column and a numerical phenotype value in the second #
if re.match(r"^OrAeBC5", gene): para_list += 1 if para_list > 0: return (True) else: return (False) # get tree files from directory tree_files = os.listdir(str(sys.argv[1])) for tree in tree_files: if tree.startswith('RAxML_bipartitions.'): # get orthogroup id ortho = re.sub(r'\D', "", tree) # load newick tree #print(tree) t = PhyloTree(tree) #print(t) evts = file('%s' '/' '%s' '.temp' % (events_dir, tree), "w") ########################################################################################## # evolutionary events involving all taxa ########################################################################################## # Alternatively, you can scan the whole tree topology events = t.get_descendant_evol_events() # print its orthology and paralogy relationships for ev in events: if ev.etype == "S": evts.write(",".join(ev.in_seqs)) evts.write("<===>") evts.write(",".join(ev.out_seqs)) evts.write("\n") elif ev.etype == "D":
name2score[name] = "Fuzzy:%0.2f" %sim for name in all_names: taxid = name2id.get(name, "???") realname = name2realname.get(name, name) score = name2score.get(name, "Exact:1.0") print "\t".join(map(str, [score, name, realname.capitalize(), taxid])) if args.taxid_file: all_taxids.extend(map(strip, open(args.taxid_file, "rU").read().split("\n"))) if args.taxid: all_taxids.extend(args.taxid) reftree = None if args.reftree: reftree = PhyloTree(args.reftree) all_taxids.extend(list(set([n.name for n in reftree.iter_leaves()]))) if all_taxids and args.info: log.info("Dumping %d taxid translations:" %len(all_taxids)) all_taxids = set(all_taxids) all_taxids.discard("") translator = get_taxid_translator(all_taxids) for taxid, name in translator.iteritems(): lineage = get_sp_lineage(taxid) named_lineage = ','.join(translate_to_names(lineage)) lineage = ','.join(map(str, lineage)) print "\t".join(map(str, [taxid, name, named_lineage, lineage ])) for notfound in all_taxids - set(str(k) for k in translator.iterkeys()): print >>sys.stderr, notfound, "NOT FOUND"
MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAH """ iphylip_txt = """ 4 76 seqA MAEIPDETIQ QFMALT---H NIAVQYLSEF GDLNEALNSY YASQTDDIKD RREEAHQFMA seqB MAEIPDATIQ QFMALTNVSH NIAVQY--EF GDLNEALNSY YAYQTDDQKD RREEAHQFMA seqC MAEIPDATIQ ---ALTNVSH NIAVQYLSEF GDLNEALNSY YASQTDDQPD RREEAHQFMA seqD MAEAPDETIQ QFMALTNVSH NIAVQYLSEF GDLNEAL--- ---------- -REEAHQ--- LTNVSHQFMA LTNVSH LTNVSH---- ------ LTNVSH---- ------ -------FMA LTNVSH """ # Load a tree and link it to an alignment. As usual, 'alignment' can # be the path to a file or data in text format. t = PhyloTree("(((seqA,seqB),seqC),seqD);", alignment=fasta_txt, alg_format="fasta") #We can now access the sequence of every leaf node print "These are the nodes and its sequences:" for leaf in t.iter_leaves(): print leaf.name, leaf.sequence #seqD MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAH #seqC MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH #seqA MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAH #seqB MAEIPDATIQQFMALTNVSHNIAVQY--EFGDLNEALNSYYAYQTDDQKDRREEAH # # The associated alignment can be changed at any time t.link_to_alignment(alignment=iphylip_txt, alg_format="iphylip") # Let's check that sequences have changed print "These are the nodes and its re-linked sequences:" for leaf in t.iter_leaves():
''' layout for CodemlTree ''' if hasattr(node, "collapsed"): if node.collapsed == 1: node.img_style["draw_descendants"]= False if node.is_leaf(): if hasattr (node, "sequence"): seqface = MySequenceFace(node.sequence, "nt", fsize=10, col_w=11, interactive=True) faces.add_face_to_node(seqface, node, 1, aligned=True) if __name__ == "__main__": tree = PhyloTree('(Orangutan,Human,Chimp);') tree.link_to_alignment(""" >Chimp HARWLNEKLRCELRTLKKLGLDGYKAVSQYVKGRA >Orangutan DARWINEKLRCVSRTLKKLGLDGYKGVSQYVKGRP >Human DARWHNVKLRCELRTLKKLGLVGFKAVSQFVIRRA """) nt_sequences = {"Human" : "GACGCACGGTGGCACAACGTAAAATTAAGATGTGAATTGAGAACTCTGAAAAAATTGGGACTGGTCGGCTTCAAGGCAGTAAGTCAATTCGTAATACGTCGTGCG", "Chimp" : "CACGCCCGATGGCTCAACGAAAAGTTAAGATGCGAATTGAGAACTCTGAAAAAATTGGGACTGGACGGCTACAAGGCAGTAAGTCAGTACGTTAAAGGTCGTGCG", "Orangutan": "GATGCACGCTGGATCAACGAAAAGTTAAGATGCGTATCGAGAACTCTGAAAAAATTGGGACTGGACGGCTACAAGGGAGTAAGTCAATACGTTAAAGGTCGTCCG" } for l in nt_sequences: (tree & l).nt_sequence = nt_sequences[l] tree.dist = 0
from ete2 import PhyloTree # Creates a gene phylogeny with several duplication events at # different levels. Note that we are using the default method for # detecting the species code of leaves (three first lettes in the node # name are considered the species code). nw = """ ((Dme_001,Dme_002),(((Cfa_001,Mms_001),((((Hsa_001,Hsa_003),Ptr_001) ,Mmu_001),((Hsa_004,Ptr_004),Mmu_004))),(Ptr_002,(Hsa_002,Mmu_002)))); """ t = PhyloTree(nw) print "Original tree:", print t # # /-Dme_001 # /--------| # | \-Dme_002 # | # | /-Cfa_001 # | /--------| # | | \-Mms_001 # | | #--| | /-Hsa_001 # | | /--------| # | /--------| /--------| \-Hsa_003 # | | | | | # | | | /--------| \-Ptr_001 # | | | | | # | | | | \-Mmu_001 # | | \--------| # \--------| | /-Hsa_004 # | | /--------|
from ete2 import PhyloTree # Loads a gene tree and its corresponding species tree. Note that # species names in sptree are the 3 firs letters of leaf nodes in # genetree. gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));' species_tree_nw = "((((Hsa, Ptr), Mmu), (Mms, Cfa)), Dme);" genetree = PhyloTree(gene_tree_nw) sptree = PhyloTree(species_tree_nw) print genetree # /-Dme_001 # /--------| # | \-Dme_002 # | # | /-Cfa_001 # | /--------| #---------| | \-Mms_001 # | /--------| # | | | /-Hsa_001 # | | | /--------| # | | \--------| \-Ptr_001 # \--------| | # | \-Mmu_001 # | # | /-Ptr_002 # \--------| # | /-Hsa_002 # \--------| # \-Mmu_002 # # Let's reconcile our genetree with the species tree
from ete2 import PhyloTree # Reads a phylogenetic tree (using default species name encoding) t = PhyloTree("(((Hsa_001,Ptr_001),(Cfa_001,Mms_001)),(Dme_001,Dme_002));") # /-Hsa_001 # /--------| # | \-Ptr_001 # /--------| # | | /-Cfa_001 # | \--------| #---------| \-Mms_001 # | # | /-Dme_001 # \--------| # \-Dme_002 # # Prints current leaf names and species codes print "Deafult mode:" for n in t.get_leaves(): print "node:", n.name, "Species name:", n.species # node: Dme_001 Species name: Dme # node: Dme_002 Species name: Dme # node: Hsa_001 Species name: Hsa # node: Ptr_001 Species name: Ptr # node: Cfa_001 Species name: Cfa # node: Mms_001 Species name: Mms # # We can also use our own leaf name parsing function to obtain species # names. All we need to do is create a python function that takes # node's name as argument and return its corresponding species name. def get_species_name(node_name_string): # Species code is the first part of leaf name (separated by an
from Bio import SeqIO from Bio import motifs from Bio.Cluster import distancematrix from Bio.Cluster import clustercentroids records = list(SeqIO.parse("./txt/cm_perm_sequence_27_social.fasta", "fasta")) for seq_record in SeqIO.parse("./txt/cm_perm_sequence_27_social.fasta", "fasta"): print seq_record.id print repr(seq_record.seq) print len(seq_record) from Bio.Align.Applications import ClustalwCommandline clustalx = '/Applications/PhylogeneticAnalysis/clustalw2' cline = ClustalwCommandline(clustalx, infile="./txt/cm_perm_sequence_27_social.fasta") print cline stdout, stderr = cline() from Bio import Phylo tree = Phylo.read("./txt/cm_perm_sequence_27_social.dnd", "newick") tree.rooted = True #Phylo.draw(tree) from ete2 import Tree from ete2 import PhyloTree t = PhyloTree('./txt/cm_perm_sequence_27_social.dnd') t.link_to_alignment(alignment="./txt/cm_perm_sequence_27_social.fasta", alg_format="fasta") #from ete2 import ClusterTree #t = ClusterTree('./txt/cm_perm_sequence_27_social.dnd') t.show() #t.show("heatmap") #t.show("cluster_cbars") #t.show("cluster_bars") #t.show("cluster_lines")
### Add known sequences (adapters) with open(temp_file_name, 'a') as f: for name, seq in known_sequences.items(): f.write(">"+str(name)+"\n") f.write(seq+"\n") ### Align if verbose: print "aligning..." aln_file_name = os.path.splitext(temp_file_name)[0] + ".afa" align_muscle(temp_file_name, aln_file_name, gapopen=-1000.0) ### Build tree if verbose: print "building tree..." tree, aln = build_tree_FT(aln_file_name) ### Show in pretty format pretty_tree = PhyloTree(str(tree), alignment=aln_file_name, alg_format="fasta") pretty_tree.ladderize() ts = TreeStyle() pretty_tree.render(outfile, tree_style=ts) ### Clean up your mess os.remove(temp_file_name) os.remove(aln_file_name) ### TODO # highlight adapter rows # root on adapter?
import sys from collections import defaultdict from ete2 import PhyloTree if len(sys.argv) > 1: t = PhyloTree(sys.argv[1]) else: t = PhyloTree() #t.populate(5000, reuse_names=True, names_library=map(lambda x: "%03d" %x, range(100))) #t.populate(5000, reuse_names=True, names_library=["aaa", "bbb", "ccc","dddd"]) #t.set_species_naming_function(lambda x: x[:3]) #t = PhyloTree("((((Kla0008018:0.226825,(Kwa0003593:0.270871,(((((((Sce0006606:0.020101,(Smi0000169:0.045626,Sku0001100:0.091634)0.9:0.021336)0.473:0.004546,Spa0001368:0)0.806:0.040152,Sba0000063:0.059101)0.967:0.124536,Sca0004780:0.57162)0.36:0.045976,Cgl0005705:0.244154)0.94:0.080608,(((Spa0003632:0.005291,Sce0012358:0.019313)0.879:0.014349,Smi0005102:0.031246)0.028:0.000541,(Sba0002319:0.027948,Sku0001858:0.037758)0.873:0.023849)0.995:0.14497)0.859:0.056767,(Sca0004490:0.235469,Kpo0005032:0.313188)0.699:0.077825)0.807:0.085287)0.523:0.049374)0.606:0.167197,Ago0006484:0.438321)0.976:0.605273,Cal0012751:1.95721)0.975:0.332581,(Cal0010356:0.478947,((Ago0007434:1.13211,Kwa0002043:1.20443)0.282:0.216219,(Skl0001126:0.276168,Cgl0008719:0.5381)0.454:0.191735)0.934:0.438082)0.975:0.332581);") #t = PhyloTree("((((((AAA1, AAA2),((BBB1,BBB2), AAA3)D1),(CCC1,CCC2)), AAA8)D2, (((AAA5, AAA6),((BBB5,BBB6), AAA4)D3),(CCC3,CCC4)))D4, D);", format=1) t = PhyloTree("((((((((AAA1, AAA2:0.111)a1,(((BBB1,ZZZ1)a2,MMM1)a3,AAA4)a4)a5, AAA3)a6,(AAA4, (AAA5, XXX1)a8)a9)a10,DDD)a11,DDD)a12,DDD)a13,DDD)root;", format=1) print t.get_ascii() ntrees, ndups, sp_trees = t.get_speciation_trees(map_features=["dist"]) for sptree in sp_trees: print sptree.get_ascii(attributes=["dist"])
def __init__(self): self.taxoDB = {} self.tree = PhyloTree() self.tree.name = "NoName"
def test_ncbi_compare(self): t = PhyloTree( "((9606, (9598, 9606)), 10090);", sp_naming_function=lambda x: x.name ) t.annotate_ncbi_taxa(dbfile=DATABASE_PATH)
#!/usr/bin/python from __future__ import absolute_import import sys from ete2 import PhyloTree if __name__ == "__main__": t = sys.argv[1] s = sys.argv[2] out = sys.argv[3] pt = PhyloTree(t) # pt.link_to_alignment(alignment=s) pt.render(out)
"delta.plot": "delta_plot", "dist.dna": "dist_dna", "dist.nodes": "dist_nodes", "node.depth": "node_depth", "node.depth.edgelength": "node_depth_edgelength", "node.height": "node_height", "node.height.clado": "node_height_clado", "prop.part": "prop_part" } ape = importr("ape", robject_translations=ape_objects) # Required for phangorn ph = importr("phangorn") # Phylogenetic operations in R print "All modules imported successfully" t = PhyloTree(intree, alignment=seq, alg_format=seq_format) # Main tree containing entire sequence dtp = PhyloTree(intree) # Dummy tree for phenotype shuffling print "Tree file read successfully" phenfile = open(phen, "r") # Phenotype file phenlist = [] for line in phenfile.readlines(): phenlist.append([line.split("\t")[0].strip(), line.split("\t")[1].strip()]) phenfile.close() phenotype = { } # Dictionary containing species names and their phenotype values # Phenotype file should have two columns separated by tab containing taxa name # in the first column and a numerical phenotype value in the second
ts = TreeStyle() # ts.mode = "c" for i in open(in_id,"r").readlines(): i=i.strip('\n') print i outf = "/".join(in_id.split('/')[:-1])+"/top_hits_pm1_madss/"+i+"_blastp_hits_"+in_eval+".fasta" no_hits = blast_gene(i,in_eval,indb,outf) print no_hits align_args = "/usr/local/bin/megacc -a "+ align_mao +" -o "+align_dir+" -s -d " + outf subprocess.Popen(align_args, shell=True).wait() sl(2) align_lis = glob.glob(align_dir + "/*.meg") alignpath = '' for j in align_lis: if i in j: tree_args = "/usr/local/bin/megacc -a "+ tree_mao +" -o "+tree_dir+" -d " + j subprocess.Popen(tree_args, shell=True).wait() tree_ls = glob.glob(tree_dir + "/*.nwk") for j in tree_ls: if i in j and "consensus" not in j: t = PhyloTree(j, format=1) #t.show() # t = Phylo.read(j,"newick") # #t.ladderize() # #Phylo.draw(t) # Phylo.write(t,j.replace(".nwk",".xml"),"phyloxml") # Phylo.draw_graphviz(t,prog="neato") t.render(tree_dir+"/"+i+"_blastp_hits_"+in_eval+".pdf",tree_style=ts,dpi=200)
__author__ = 'mjohnpayne' import sys from ete2 import Tree, faces, AttrFace, TreeStyle, NodeStyle, PhyloTree, PieChartFace import math # infile = open('/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree.nwk','r') # outfile = open('/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree_nos_only.nwk','w') # infile = infile.read() t = PhyloTree( '/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree.nwk', format=1) # ts = TreeStyle() # # t.show(tree_style=ts) for node in t: #.iter_search_nodes(): # name = node.name # name = name[name.find("_")+1:] # node.name = name print node.name if node.name == "41": node.dist = 5e-05 # t.write(outfile='/Volumes/MP_HD/CI_GENOME_SEQ/CI_gene_coverage (generate stat for sig diff cov)/gene_copy_no_tree/CI_node_assignments_tree_nos.nwk',format=1,) ts = TreeStyle() t.show(tree_style=ts)
def get_topology(self, taxids, intermediate_nodes=False, rank_limit=None, collapse_subspecies=False, annotate=True): """Given a list of taxid numbers, return the minimal pruned NCBI taxonomy tree containing all of them. :param False intermediate_nodes: If True, single child nodes representing the complete lineage of leaf nodes are kept. Otherwise, the tree is pruned to contain the first common ancestor of each group. :param None rank_limit: If valid NCBI rank name is provided, the tree is pruned at that given level. For instance, use rank="species" to get rid of sub-species or strain leaf nodes. :param False collapse_subspecies: If True, any item under the species rank will be collapsed into the species upper node. """ from ete2 import PhyloTree sp2track = {} elem2node = {} for sp in taxids: track = [] lineage = self.get_lineage(sp) id2rank = self.get_rank(lineage) for elem in lineage: if elem not in elem2node: node = elem2node.setdefault(elem, PhyloTree()) node.name = str(elem) node.taxid = elem node.add_feature("rank", str(id2rank.get(int(elem), "no rank"))) else: node = elem2node[elem] track.append(node) sp2track[sp] = track # generate parent child relationships for sp, track in sp2track.iteritems(): parent = None for elem in track: if parent and elem not in parent.children: parent.add_child(elem) if rank_limit and elem.rank == rank_limit: break parent = elem root = elem2node[1] #remove onechild-nodes if not intermediate_nodes: for n in root.get_descendants(): if len(n.children) == 1 and int(n.name) not in taxids: n.delete(prevent_nondicotomic=False) if len(root.children) == 1: tree = root.children[0].detach() else: tree = root if collapse_subspecies: to_detach = [] for node in tree.traverse(): if node.rank == "species": to_detach.extend(node.children) for n in to_detach: n.detach() if annotate: self.annotate_tree(tree) return tree
realname = name2realname.get(name, name) score = name2score.get(name, "Exact:1.0") print "\t".join( map(str, [score, name, realname.capitalize(), taxid])) if args.taxid_file: all_taxids.extend( map(strip, open(args.taxid_file, "rU").read().split("\n"))) if args.taxid: all_taxids.extend(args.taxid) reftree = None if args.reftree: reftree = PhyloTree(args.reftree) all_taxids.extend(list(set([n.name for n in reftree.iter_leaves()]))) if all_taxids and args.info: log.info("Dumping %d taxid translations:" % len(all_taxids)) all_taxids = set(all_taxids) all_taxids.discard("") translator = get_taxid_translator(all_taxids) for taxid, name in translator.iteritems(): lineage = get_sp_lineage(taxid) named_lineage = ','.join(translate_to_names(lineage)) lineage = ','.join(map(str, lineage)) print "\t".join(map(str, [taxid, name, named_lineage, lineage])) for notfound in all_taxids - set( str(k) for k in translator.iterkeys()): print >> sys.stderr, notfound, "NOT FOUND"
#!/usr/bin/python ####Python script that can take in a gene tree and detect paralogs and duplications; uses python module ete2 ###linh c 6/5/2014 import sys # Import ete2 module from ete2 import PhyloTree # Take in file input file = sys.argv[1] output = sys.argv[2] # Load a tree structure from a newick file. t = PhyloTree(file) print t # Alternatively, you can scan the whole tree topology events = t.get_descendant_evol_events() # Open output file fo = open(output, "wb+") # Print its orthology and paralogy relationships fo.write( 'Events detected from the root of the tree,' + file + '\n') for ev in events: if ev.etype == "S": fo.write ('ORTHOLOGY RELATIONSHIP:' + ','.join(ev.in_seqs) + '<====>' + ','.join(ev.out_seqs) + '\n') elif ev.etype == "D":
exit() # create output directory events_dir = '%s' '/events' % (str(sys.argv[1])) if not os.path.exists(events_dir): os.makedirs(events_dir) # get tree files from directory tree_files = os.listdir(str(sys.argv[1])) for tree in tree_files: if re.match(r"^\d+\.fna\.aln.+\.tree", tree): # get orthogroup id ortho = re.sub(r'\D', "", tree) # load newick tree #print(tree) t = PhyloTree(tree) #print(t) evts = file('%s' '/' '%s' '.temp' % (events_dir, tree), "w") ########################################################################################## # evolutionary events involving all taxa ########################################################################################## # Alternatively, you can scan the whole tree topology events = t.get_descendant_evol_events() # print its orthology and paralogy relationships for ev in events: if ev.etype == "S": evts.write(",".join(ev.in_seqs)) evts.write("<===>") evts.write(",".join(ev.out_seqs)) evts.write("\n") elif ev.etype == "D":
def run(args): from ete2 import Tree, PhyloTree features = set() for nw in args.src_tree_iterator: if args.ncbi: tree = PhyloTree(nw) features.update([ "taxid", "name", "rank", "bgcolor", "sci_name", "collapse_subspecies", "named_lineage", "lineage" ]) tree.annotate_ncbi_taxa(args.taxid_attr) else: tree = Tree(nw) type2cast = { "str": str, "int": int, "float": float, "set": set, "list": list } for annotation in args.feature: aname, asource, amultiple, acast = None, None, False, str for field in annotation: try: key, value = map(strip, field.split(":")) except Exception: raise ValueError("Invalid feature option [%s]" % field) if key == "name": aname = value elif key == "source": asource = value elif key == "multiple": #append amultiple = value elif key == "type": try: acast = type2cast[value] except KeyError: raise ValueError("Invalid feature type [%s]" % field) else: raise ValueError("Unknown feature option [%s]" % field) if not aname and not asource: ValueError( 'name and source are required when annotating a new feature [%s]' % annotation) features.add(aname) for line in open(asource, 'rU'): line = line.strip() if not line or line.startswith('#'): continue nodenames, attr_value = map(strip, line.split('\t')) nodenames = map(strip, nodenames.split(',')) relaxed_grouping = True if nodenames[0].startswith('!'): relaxed_grouping = False nodenames[0] = nodenames[0][1:] if len(nodenames) > 1: target_node = tree.get_common_ancestor(nodenames) if not relaxed_grouping: pass # do something else: target_node = tree & nodenames[0] if hasattr(target_node, aname): log.warning('Overwriting annotation for node" [%s]"' % nodenames) else: target_node.add_feature(aname, acast(attr_value)) dump(tree, features=features)