def pick_otu(spe_out, alignment): fin = open(spe_out) lines = fin.readlines() fin.close() fout = open(alignment + ".otu", "w") aln = SeqGroup(sequences=alignment) for i in range(len(lines)): line = lines[i] if line.startswith("Species"): nline = lines[i + 1].strip() seq = aln.get_seq(nline) fout.write(">" + nline + "\n") fout.write(seq + "\n") fout.close()
def _create_tree (tree,fasta,out,color): seqs = SeqGroup(fasta, format="fasta") t = Tree(tree) colors = _parse_color_file(color) node_names = t.get_leaf_names() for name in node_names: seq = seqs.get_seq(name) seqFace = SeqMotifFace(seq, seq_format="()") node = t.get_leaves_by_name(name) for i in range(0,len(node)): if name in colors: ns = NodeStyle() ns['bgcolor'] = colors[name] node[i].set_style(ns) node[i].add_face(seqFace,0,'aligned') t.render(out)
def link_to_alignment(self, alignment, alg_format="fasta", **kwargs): missing_leaves = [] missing_internal = [] if type(alignment) == SeqGroup: alg = alignment else: alg = SeqGroup(alignment, format=alg_format, **kwargs) # sets the seq of for n in self.traverse(): try: n.add_feature("sequence",alg.get_seq(n.name)) except KeyError: if n.is_leaf(): missing_leaves.append(n.name) else: missing_internal.append(n.name) if len(missing_leaves)>0: print("Warnning: [%d] terminal nodes could not be found in the alignment." %\ len(missing_leaves), file=sys.stderr)
def link_to_alignment(self, alignment, alg_format="fasta", **kwargs): missing_leaves = [] missing_internal = [] if type(alignment) == SeqGroup: alg = alignment else: alg = SeqGroup(alignment, format=alg_format, **kwargs) # sets the seq of for n in self.traverse(): try: n.add_feature("sequence", alg.get_seq(n.name)) except KeyError: if n.is_leaf(): missing_leaves.append(n.name) else: missing_internal.append(n.name) if len(missing_leaves) > 0: print("Warnning: [%d] terminal nodes could not be found in the alignment." %\ len(missing_leaves), file=sys.stderr)
t = PhyloTree( tree_input , format=1, quoted_node_names=True ) seqs = SeqGroup(alg, format="fasta") nodestyle1 = NodeStyle() nodestyle1["size"] = 0 nodestyle1["vt_line_width"] = 2 nodestyle1["hz_line_width"] = 2 for node in t.traverse(): node.set_style(nodestyle1) for leaf in t.iter_leaves(): item=seqs.get_seq(leaf.name) name_face = AttrFace(item, fsize=24) Bars = SequenceFace(item, seqtype='aa', fsize=24, bg_colors={'G': 'Khaki', 'A': 'Khaki', 'S': 'Khaki', 'T': 'Khaki', 'C': 'LightGreen', 'V': 'LightGreen', 'I': 'LightGreen', 'L': 'LightGreen', 'P': 'LightGreen', 'F': 'LightGreen', 'Y': 'LightGreen', 'M': 'YellowGreen', 'W': 'LightGreen', 'N': 'Thistle', 'Q': 'Thistle', 'H': 'Thistle', 'D': 'DarkSalmon', 'E': 'DarkSalmon', 'K': 'SkyBlue', 'R': 'SkyBlue', 'X':'Black', '-':'White' }, fg_colors=None, codon=None, col_w=1.5, alt_col_w=3, special_col=None, interactive=False) leaf.add_face(Bars, 2, "aligned") t.render("tree_and_alignment.png", h=100, units="mm") t.render("tree_and_alignment.svg", h=100, units="mm") t2 = PhyloTree( tree_input , format=1, quoted_node_names=True ) for node in t2.traverse(): node.set_style(nodestyle1) t2.convert_to_ultrametric(tree_length=None, strategy='balanced')
cf = CircleFace(10, "black") elif ies == '0': cf = CircleFace(10, "LightGrey") else: sys.exit(1) column = hiesL[(geneFamily, homIES)] + 1 leaf.add_face(cf, column, "aligned") drawTree(outputFile) elif plotStyle == '3': # plot with MSA # load nucleotide sequences for all genes! nuclAlnFile = os.path.join(basePath, 'analysis', 'msas', 'filtered', 'cluster.' + geneFamily + '.nucl.fa') seqs = SeqGroup(sequences = nuclAlnFile, format = "fasta") for leaf in t: geneId = leaf.name seq = seqs.get_seq(geneId) seq = seq.translate(None, string.ascii_lowercase) # keep only CDS iesmotif = [[1, len(seq), "line", 2, 5, None, None, None]] for homIES in gfhomIES[geneFamily]: (begin, end, ies, iesId, beginMSA, endMSA) = charMat[(geneFamily, homIES, geneId)] if ies == '?': if beginMSA == 'NA': iesmotif.append([int(begin), int(end),"()", 10, 10, "red", "black", "arial|8|black|?"]) else: iesmotif.append([int(begin), int(end),"()", 10, 10, "red", "black", "arial|8|black|?"]) elif ies == '1': iesmotif.append([int(beginMSA), int(endMSA),"[]", 10, 10, "black", "red", "arial|8|black|" + iesId]) elif ies == '0': iesmotif.append([int(begin), int(end), "[]", 10, 10, "silver", "silver", None]) else: quit(1)
import re from ete3 import SeqGroup, Tree import sys alg_file = sys.argv[1] # in fasta format tree_file = sys.argv[2] # in newick format alg = SeqGroup(alg_file) for k, v in alg.name2id.items(): # converts ilegal newick chars from alg names. # Comment this line if not necessary k = re.sub('[:,();]', '_', k) alg.name2id[k] = v tree = Tree(tree_file) for leaf in tree: print(">%s\n%s" % (leaf.name, alg.get_seq(leaf.name)))
from ete3 import SeqGroup sp_mem = {} in_fasta = SeqGroup('/home/plaza/research/dom_walk/raw/COG0484.faa') for num, (name, seq, _) in enumerate(in_fasta): sp = name.split('.')[0] if sp not in sp_mem: sp_mem[sp] = [] sp_mem[sp].append(name) print ('writing fastas per sp') for k, val in sp_mem.items(): out_fasta = open('/home/plaza/research/dom_walk/analysis/fasta_per_sp/'+k+'.faa', 'w') for seq_name in val: print (">%s" %(seq_name), file = out_fasta) print (in_fasta.get_seq(seq_name), file =out_fasta) out_fasta.close()
from ete3 import SeqGroup, Tree import sys tree_file = sys.argv[1] # in newick format original_fasta = SeqGroup(sys.argv[2]) pruned_fasta = open(sys.argv[3], 'w') star_target = str(sys.argv[4]) end_target = str(sys.argv[5]) tree = Tree(tree_file) R = tree.get_midpoint_outgroup() tree.set_outgroup(R) name_list = [] for num, leaf in enumerate(tree): name_list.append(leaf.name) if star_target == leaf.name: star_pos = num if end_target == leaf.name: end_pos = num pruned_list = name_list[star_pos:(end_pos + 1)] print pruned_list #for ele in pruned_list: # print >>pruned_fasta,">%s\n%s"%(ele, original_fasta.get_seq(ele)) for ele in name_list: if ele not in pruned_list: print >> pruned_fasta, ">%s\n%s" % (ele, original_fasta.get_seq(ele))