def get_example_tree(): # Performs a tree reconciliation analysis gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));' t = PhyloTree(gene_tree_nw) ts = TreeStyle() # disable default PhyloTree Layout ts.layout_fn = lambda x: True t.link_to_alignment(alg) node2content = t.get_cached_content() for node in t.traverse(): node.img_style["size"] = 0 if not node.is_leaf(): leaves = node2content[node] # get columns with different aa subseqs, relevant_columns = mutation_columns([lf.sequence for lf in leaves]) for seq in subseqs: f = SeqMotifFace(seq, seq_format="seq", width=10, height=8) f.margin_top = 2 f.margin_right = 6 node.add_face(f, column=0, position="branch-bottom") for j, col in enumerate(relevant_columns): col_f = RectFace(10, 10, fgcolor=None, bgcolor=None, label={"text":str(col), "fonttype":"Courier", "color":"black", "fontsize":6}) node.add_face(col_f, column=j, position="branch-top") col_f.margin_bottom = 2 else: f = SeqMotifFace(node.sequence, seq_format="seq", width=6) node.add_face(f, column=0, position="aligned") alg_length = len(lf.sequence) ts.draw_aligned_faces_as_table = False for colnum in xrange(alg_length): col_f = RectFace(10, 10, fgcolor=None, bgcolor=None, label={"text":str(colnum), "fonttype":"Courier", "color":"black", "fontsize":6}) ts.aligned_header.add_face(col_f, column=colnum) return t, ts
def process_tree(treepath): ''' processes a tree to extract orthology relationships between target taxid and the rest of species, organized by orthology type and species code ''' treepath = str(treepath) treepath = treepath.rstrip() t = PhyloTree(treepath, sp_naming_function=get_species) # traverse all leaves in tree file and get taxid leaf_count = 0 for leaf in t: leaf_count += 1 tax = int(leaf.name.split(".", 1)[0]) #get scientific name and convert taxid from int to str sci_name = names.get(tax) leaf.taxid = str(tax) #rename leaves names try: good_name = "%s" % (conversion[leaf.name][0]) except: good_name = leaf.name good_name = re.sub("[ |\t,:)(;\n\]\[]+", "_", good_name) leaf.good_name = good_name #obtain cluster name from tree file path clus_name = os.path.split(treepath)[-1].replace(".fa.final_tree.nw", "") try: base_name = conversion[clus_name][0].replace('|', '_') except: base_name = clus_name[0] t.dist = 0 #colapses plat specific node2content = t.get_cached_content() target_species = set([target_taxid]) def is_sp_specific(_node): _species = set([_leaf.species for _leaf in node2content[_node]]) if not (_species - target_species): return True return False #traverse only lamprey leaves if collapse == 'yes': for n in t.get_leaves(is_leaf_fn=is_sp_specific): if n.children: for ch in n.get_children(): ch.detach() n.taxid = target_taxid n.name = "%s" % ('|'.join( [_lf.name for _lf in node2content[n]])) n.good_name = "{%s}" % ('|'.join( [_lf.good_name for _lf in node2content[n]])) #set outgroup outgroup = t.get_midpoint_outgroup() try: t.set_outgroup(outgroup) except: if len(t) == 1: return else: raise node2content = t.get_cached_content() event_lines = [] for ev in t.get_descendant_evol_events(): if ev.etype == "S": source_seqs = node2content[ev.node.children[0]] ortho_seqs = node2content[ev.node.children[1]] sp_1 = set() for leaf in source_seqs: sp_1.add(leaf.taxid) sp_2 = set() for leaf in ortho_seqs: sp_2.add(leaf.taxid) if str(target_taxid) in sp_1: source_seqs, ortho_seqs = source_seqs, ortho_seqs elif str(target_taxid) in sp_2: source_seqs, ortho_seqs = ortho_seqs, source_seqs else: continue #co_orthologs is a list with lamprey seed in source_seqs co_orthologs = [ leaf.good_name for leaf in source_seqs if leaf.taxid == str(target_taxid) ] co_orthologs.sort() #orthologs is a list of all ortho_seqs names orthologs = defaultdict(set) for leaf in ortho_seqs: sp = int(leaf.taxid) orthologs[sp].add(leaf.good_name) if len(co_orthologs) == 1: _otype = "one-to-" else: _otype = "many-to-" for sp, orth in orthologs.iteritems(): if len(orth) == 1: otype = _otype + "one" else: otype = _otype + "many" event_lines.append('\t'.join([ ','.join(co_orthologs), otype, str(sp), names[sp], ','.join(sorted(orth)), '\n' ])) return event_lines
def process_tree(treepath): ''' processes a tree to extract orthology relationships between target taxid and the rest of species, organized by orthology type and species code ''' treepath = str(treepath) treepath = treepath.rstrip() t = PhyloTree(treepath, sp_naming_function=get_species) treefile = os.path.basename(treepath) t.dist = 0 outgroup = t.get_midpoint_outgroup() try: t.set_outgroup(outgroup) t.standardize() except: if args.pairs_table: if len(t) == 1: sys.stderr.write(treefile + 'len(t) == 1' + '\n') return ([], []) #return (['aa', 'aa'] ,[['aa', 'aa']]) else: sys.stderr.write(treefile + 'len(t) != 1' + '\n') l = t.get_leaf_names() r = l[0] t.set_outgroup(r) pass #return ([],[]) #return (['None', 'None'] ,[['None', 'None']]) else: if len(t) == 1: sys.stderr.write(treefile + 'len(t) == 1' + '\n') return [] else: sys.stderr.write(treefile + 'len(t) != 1' + '\n') return [] names = {} for leaf in t: try: sp = str(leaf.name.split('.')[0]) leaf.taxid = str(sp) sci_name = ncbi.get_taxid_translator([sp]) names[sp] = sci_name[int(sp)] except: names[sp] = '' if args.conv_table: try: good_name = "%s" % (conversion[leaf.name][0]) except: good_name = leaf.name leaf.good_name = good_name node2content = t.get_cached_content() target_species = set([target_taxid]) def is_sp_specific(_node): _species = set([_leaf.species for _leaf in node2content[_node]]) if not (_species - target_species): return True return False #traverse only target taxid leaves if collapse == 'yes': for n in t.get_leaves(is_leaf_fn=is_sp_specific): if n.children: for ch in n.get_children(): ch.detach() n.taxid = target_taxid n.name = "{%s}" % ('|'.join( [_lf.name for _lf in node2content[n]])) if args.conv_table: n.good_name = "{%s}" % ('|'.join( [_lf.good_name for _lf in node2content[n]])) all_ortholgs_tree = [] all_ortholgs_pairs = [] event_lines = [] for ev in t.get_descendant_evol_events(): if ev.etype == "S": source_seqs = ev.node.children[0] ortho_seqs = ev.node.children[1] if target_taxid: sp_1 = set() for leaf in source_seqs: sp_1.add(leaf.taxid) sp_2 = set() for leaf in ortho_seqs: sp_2.add(leaf.taxid) if str(target_taxid) in sp_1: source_seqs, ortho_seqs = source_seqs, ortho_seqs elif str(target_taxid) in sp_2: source_seqs, ortho_seqs = ortho_seqs, source_seqs else: continue if args.conv_table: co_orthologs = [leaf.good_name for leaf in source_seqs] co_orthologs.sort() else: co_orthologs = [leaf.name for leaf in source_seqs] co_orthologs.sort() orthologs = defaultdict(set) for leaf in ortho_seqs: sp = str(leaf.name.split('.')[0]) if args.conv_table: orthologs[sp].add(leaf.good_name) else: orthologs[sp].add(leaf.name) if len(source_seqs) == 1: _otype = "one-to-" else: _otype = "many-to-" for sp, orth in orthologs.items(): if len(orth) == 1: otype = _otype + "one" else: otype = _otype + "many" event_lines.append('\t'.join([ ','.join(co_orthologs), otype, str(sp), ','.join(sorted(orth)), treefile, names[sp], '\n' ])) if args.pairs_table: source_seqs_names = [] ortho_seqs_names = [] for node in source_seqs: for leaf in node: if args.conv_table: name = leaf.good_name else: name = leaf.name source_seqs_names.append(name) for node in ortho_seqs: for leaf in node: if args.conv_table: name = leaf.good_name else: name = leaf.name ortho_seqs_names.append(name) all_ortholgs_node = itertools.product(source_seqs_names, ortho_seqs_names) all_ortholgs_tree.append(all_ortholgs_node) for node in all_ortholgs_tree: for pair in node: all_ortholgs_pairs.append(pair) #return (event_lines, all_ortholgs_pairs) if args.pairs_table: return (event_lines, all_ortholgs_pairs) else: return (event_lines)