def test_shortcut_functions(self): t = PhyloTree( """((((Human_1, Chimp_1), (Human_2, (Chimp_2, Chimp_3))), ((Fish_1, (Human_3, Fish_3)), Yeast_2)), Yeast_1);""") t.set_species_naming_function(lambda node: node.name.split("_")[0]) t.get_descendant_evol_events() # DDDSSSDDS root = t.get_tree_root() # Detects two consecutive nodes with duplications pattern0 = """('n_duplications(@) > 0')'n_duplications(@) > 0 '; """ pattern1 = """( 'contains_leaves(@, ["Chimp_2", "Chimp_3"])'); """ pattern2 = """'n_speciations(@) > 3 '; """ pattern0 = TreePattern(pattern0) pattern1 = TreePattern(pattern1) pattern2 = TreePattern(pattern2) pattern0_match = list(pattern0.find_match(t, maxhits=None)) pattern1_match = list(pattern1.find_match(t, maxhits=None)) pattern2_match = list(pattern2.find_match(t, maxhits=None)) self.assertEqual(len(pattern0_match), 5) self.assertEqual(len(pattern1_match), 4) self.assertEqual(pattern1_match[0], root) self.assertEqual(len(pattern2_match), 2) self.assertEqual(pattern2_match[0], root) self.assertEqual(pattern2_match[1], root.children[0])
def run(args): from ete3 import Tree, PhyloTree for nw in args.src_tree_iterator: if args.orthologs is not None: t = PhyloTree(nw) for e in t.get_descendant_evol_events(): print(e.in_seqs, e.out_seqs)
def run(args): from ete3 import Tree, PhyloTree for nw in args.src_tree_iterator: if args.orthologs is not None: t = PhyloTree(nw) for e in t.get_descendant_evol_events(): print(e.in_seqs, e.out_seqs)
SPECIES_NAME_POS = args.species_field SPECIES_NAME_DELIMITER = args.species_delimiter # load a phylomeDB Tree provided as a newick file in the command line t = PhyloTree(newick, sp_naming_function=extract_spname) if args.root: if len(args.root) > 1: outgroup = t.get_common_ancestor(args.root) else: outgroup = t & args.root[0] t.set_outgroup(outgroup) if not args.skip_ortholog_detection: # detect speciation and duplication events using the species overlap # algorithm used in phylomeDB t.get_descendant_evol_events() if args.ascii: print( t.get_ascii(attributes=[args.evoltype_attr, "name"], show_internal=True)) if args.newick: print(t.write(features=[args.evoltype_attr], format_root_node=True)) if args.show: t.show() export_as_orthoXML(t, args.database, args.evoltype_attr)
tphy.set_species_naming_function(get_species_name) for n in tphy.get_leaves(): print("node:", n.name, "Species name:", n.species) # In[]: # find evolutionary events using tree reconciliation tree_rec, evev_rec = tphy.reconcile(tsps) # In[]: print(tree_rec) tree_rec.show() # In[]: # find evolutionary events using species overlap evev = tphy.get_descendant_evol_events() for ev in evev: if ev.etype == "S": for s in ev.in_seqs: if s.startswith("Lepsal"): print(ev.orthologs) fseqs = lambda slist: [ s for s in slist if s.startswith("Drer") or s.startswith("Hsap") ] print("\nOrthology relationships among Anogam and Anosin") for ev in evev: if ev.etype == "D": print('Paralog: ', ','.join(fseqs(ev.in_seqs)), "<====>", ','.join(fseqs(ev.out_seqs)))
from ete3 import PhyloTree # read tree from file phy = PhyloTree("adar_hol.01.iqt.contree.newick") # assign species names to tree phy.set_species_naming_function(lambda node: node.name.split("_")[0]) for n in phy.get_leaves(): print("node:", n.name, "Species name:", n.species) # root tree phy_outgroup = phy.get_midpoint_outgroup() phy.set_outgroup(phy_outgroup) # find evolutionary events evev = phy.get_descendant_evol_events(sos_thr=0.9) for ev in evev: if ev.etype == "S": print(ev.orthologs) # find evolutionary events evev = phy.get_descendant_evol_events(sos_thr=0.9) # all events for ev in evev: print(ev.etype, ','.join(ev.in_seqs), "<====>", ','.join(ev.out_seqs)) # all events involving either Hsap or Drer fseqs = lambda slist: [ s for s in slist if s.startswith("Drer") or s.startswith("Hsap")
# To obtain all the evolutionary events involving a given leaf node we # use get_my_evol_events method matches = t.search_nodes(name="Hsa_001") human_seq = matches[0] # Obtains its evolutionary events events = human_seq.get_my_evol_events() # Print its orthology and paralogy relationships print "Events detected that involve Hsa_001:" for ev in events: if ev.etype == "S": print ' ORTHOLOGY RELATIONSHIP:', ','.join(ev.in_seqs), "<====>", ','.join(ev.out_seqs) elif ev.etype == "D": print ' PARALOGY RELATIONSHIP:', ','.join(ev.in_seqs), "<====>", ','.join(ev.out_seqs) # Alternatively, you can scan the whole tree topology events = t.get_descendant_evol_events() # Print its orthology and paralogy relationships print "Events detected from the root of the tree" for ev in events: if ev.etype == "S": print ' ORTHOLOGY RELATIONSHIP:', ','.join(ev.in_seqs), "<====>", ','.join(ev.out_seqs) elif ev.etype == "D": print ' PARALOGY RELATIONSHIP:', ','.join(ev.in_seqs), "<====>", ','.join(ev.out_seqs) # If we are only interested in the orthology and paralogy relationship # among a given set of species, we can filter the list of sequences # # fseqs is a function that, given a list of sequences, returns only # those from human and mouse fseqs = lambda slist: [s for s in slist if s.startswith("Hsa") or s.startswith("Mms")] print "Paralogy relationships among human and mouse"
def process_tree(treepath): ''' processes a tree to extract orthology relationships between target taxid and the rest of species, organized by orthology type and species code ''' treepath = str(treepath) treepath = treepath.rstrip() t = PhyloTree(treepath, sp_naming_function=get_species) # traverse all leaves in tree file and get taxid leaf_count = 0 for leaf in t: leaf_count += 1 tax = int(leaf.name.split(".", 1)[0]) #get scientific name and convert taxid from int to str sci_name = names.get(tax) leaf.taxid = str(tax) #rename leaves names try: good_name = "%s" % (conversion[leaf.name][0]) except: good_name = leaf.name good_name = re.sub("[ |\t,:)(;\n\]\[]+", "_", good_name) leaf.good_name = good_name #obtain cluster name from tree file path clus_name = os.path.split(treepath)[-1].replace(".fa.final_tree.nw", "") try: base_name = conversion[clus_name][0].replace('|', '_') except: base_name = clus_name[0] t.dist = 0 #colapses plat specific node2content = t.get_cached_content() target_species = set([target_taxid]) def is_sp_specific(_node): _species = set([_leaf.species for _leaf in node2content[_node]]) if not (_species - target_species): return True return False #traverse only lamprey leaves if collapse == 'yes': for n in t.get_leaves(is_leaf_fn=is_sp_specific): if n.children: for ch in n.get_children(): ch.detach() n.taxid = target_taxid n.name = "%s" % ('|'.join( [_lf.name for _lf in node2content[n]])) n.good_name = "{%s}" % ('|'.join( [_lf.good_name for _lf in node2content[n]])) #set outgroup outgroup = t.get_midpoint_outgroup() try: t.set_outgroup(outgroup) except: if len(t) == 1: return else: raise node2content = t.get_cached_content() event_lines = [] for ev in t.get_descendant_evol_events(): if ev.etype == "S": source_seqs = node2content[ev.node.children[0]] ortho_seqs = node2content[ev.node.children[1]] sp_1 = set() for leaf in source_seqs: sp_1.add(leaf.taxid) sp_2 = set() for leaf in ortho_seqs: sp_2.add(leaf.taxid) if str(target_taxid) in sp_1: source_seqs, ortho_seqs = source_seqs, ortho_seqs elif str(target_taxid) in sp_2: source_seqs, ortho_seqs = ortho_seqs, source_seqs else: continue #co_orthologs is a list with lamprey seed in source_seqs co_orthologs = [ leaf.good_name for leaf in source_seqs if leaf.taxid == str(target_taxid) ] co_orthologs.sort() #orthologs is a list of all ortho_seqs names orthologs = defaultdict(set) for leaf in ortho_seqs: sp = int(leaf.taxid) orthologs[sp].add(leaf.good_name) if len(co_orthologs) == 1: _otype = "one-to-" else: _otype = "many-to-" for sp, orth in orthologs.iteritems(): if len(orth) == 1: otype = _otype + "one" else: otype = _otype + "many" event_lines.append('\t'.join([ ','.join(co_orthologs), otype, str(sp), names[sp], ','.join(sorted(orth)), '\n' ])) return event_lines
def process_tree(treepath): ''' processes a tree to extract orthology relationships between target taxid and the rest of species, organized by orthology type and species code ''' treepath = str(treepath) treepath = treepath.rstrip() t = PhyloTree(treepath, sp_naming_function=get_species) treefile = os.path.basename(treepath) t.dist = 0 outgroup = t.get_midpoint_outgroup() try: t.set_outgroup(outgroup) t.standardize() except: if args.pairs_table: if len(t) == 1: sys.stderr.write(treefile + 'len(t) == 1' + '\n') return ([], []) #return (['aa', 'aa'] ,[['aa', 'aa']]) else: sys.stderr.write(treefile + 'len(t) != 1' + '\n') l = t.get_leaf_names() r = l[0] t.set_outgroup(r) pass #return ([],[]) #return (['None', 'None'] ,[['None', 'None']]) else: if len(t) == 1: sys.stderr.write(treefile + 'len(t) == 1' + '\n') return [] else: sys.stderr.write(treefile + 'len(t) != 1' + '\n') return [] names = {} for leaf in t: try: sp = str(leaf.name.split('.')[0]) leaf.taxid = str(sp) sci_name = ncbi.get_taxid_translator([sp]) names[sp] = sci_name[int(sp)] except: names[sp] = '' if args.conv_table: try: good_name = "%s" % (conversion[leaf.name][0]) except: good_name = leaf.name leaf.good_name = good_name node2content = t.get_cached_content() target_species = set([target_taxid]) def is_sp_specific(_node): _species = set([_leaf.species for _leaf in node2content[_node]]) if not (_species - target_species): return True return False #traverse only target taxid leaves if collapse == 'yes': for n in t.get_leaves(is_leaf_fn=is_sp_specific): if n.children: for ch in n.get_children(): ch.detach() n.taxid = target_taxid n.name = "{%s}" % ('|'.join( [_lf.name for _lf in node2content[n]])) if args.conv_table: n.good_name = "{%s}" % ('|'.join( [_lf.good_name for _lf in node2content[n]])) all_ortholgs_tree = [] all_ortholgs_pairs = [] event_lines = [] for ev in t.get_descendant_evol_events(): if ev.etype == "S": source_seqs = ev.node.children[0] ortho_seqs = ev.node.children[1] if target_taxid: sp_1 = set() for leaf in source_seqs: sp_1.add(leaf.taxid) sp_2 = set() for leaf in ortho_seqs: sp_2.add(leaf.taxid) if str(target_taxid) in sp_1: source_seqs, ortho_seqs = source_seqs, ortho_seqs elif str(target_taxid) in sp_2: source_seqs, ortho_seqs = ortho_seqs, source_seqs else: continue if args.conv_table: co_orthologs = [leaf.good_name for leaf in source_seqs] co_orthologs.sort() else: co_orthologs = [leaf.name for leaf in source_seqs] co_orthologs.sort() orthologs = defaultdict(set) for leaf in ortho_seqs: sp = str(leaf.name.split('.')[0]) if args.conv_table: orthologs[sp].add(leaf.good_name) else: orthologs[sp].add(leaf.name) if len(source_seqs) == 1: _otype = "one-to-" else: _otype = "many-to-" for sp, orth in orthologs.items(): if len(orth) == 1: otype = _otype + "one" else: otype = _otype + "many" event_lines.append('\t'.join([ ','.join(co_orthologs), otype, str(sp), ','.join(sorted(orth)), treefile, names[sp], '\n' ])) if args.pairs_table: source_seqs_names = [] ortho_seqs_names = [] for node in source_seqs: for leaf in node: if args.conv_table: name = leaf.good_name else: name = leaf.name source_seqs_names.append(name) for node in ortho_seqs: for leaf in node: if args.conv_table: name = leaf.good_name else: name = leaf.name ortho_seqs_names.append(name) all_ortholgs_node = itertools.product(source_seqs_names, ortho_seqs_names) all_ortholgs_tree.append(all_ortholgs_node) for node in all_ortholgs_tree: for pair in node: all_ortholgs_pairs.append(pair) #return (event_lines, all_ortholgs_pairs) if args.pairs_table: return (event_lines, all_ortholgs_pairs) else: return (event_lines)