def __write_algn(self, fullpath): """ to write algn in paml format """ seq_group = SeqGroup() for n in self: seq_group.id2seq [n.node_id] = n.nt_sequence seq_group.id2name [n.node_id] = n.name seq_group.name2id [n.name ] = n.node_id seq_group.write(outfile=fullpath, format='paml')
def __write_algn(self, fullpath): """ to write algn in paml format """ seq_group = SeqGroup() for n in self: seq_group.id2seq[n.node_id] = n.nt_sequence seq_group.id2name[n.node_id] = n.name seq_group.name2id[n.name] = n.node_id seq_group.write(outfile=fullpath, format='paml')
def extract_ss(input_path, suffix, tree_file): tree = Tree(tree_file, format=1) leaves_set = set(tree.get_leaf_names()) msa = SeqGroup(input_path.alignment, "fasta") path_argv = [input_path._version, input_path._dataset + suffix] output_path = common.Paths(path_argv, 0) data_versioning.setup_new_dataset(output_path) new_msa = SeqGroup() for entry in msa.iter_entries(): label = entry[0] sequence = entry[1] if (label in leaves_set): new_msa.set_seq(label, sequence) open(output_path.alignment, "w").write(new_msa.write(format="fasta")) shutil.copy(input_path.duplicates_json, output_path.duplicates_json) shutil.copy(input_path.outgroups_file, output_path.outgroups_file)
def extract_ss(input_path, suffix, tree_file): print( "Extracting alignment generated with the support selection tree thinning technique..." ) tree = Tree(tree_file, format=1) leaves_set = set(tree.get_leaf_names()) msa = SeqGroup(input_path.alignment, "fasta") path_argv = [input_path._version, input_path._dataset + suffix] output_path = common.Paths(path_argv, 0) data_versioning.setup_new_dataset(output_path) new_msa = SeqGroup() for entry in msa.iter_entries(): label = entry[0] sequence = entry[1] if (label in leaves_set): new_msa.set_seq(label, sequence) open(output_path.alignment, "w").write(new_msa.write(format="fasta")) shutil.copy(input_path.duplicates_json, output_path.duplicates_json) shutil.copy(input_path.outgroups_file, output_path.outgroups_file) print("New version of the snapshot: " + output_path.path)
import sys import random, string from ete3 import SeqGroup from tempfile import NamedTemporaryFile in_file = sys.argv[1] transform_fasta = sys.argv[2] out_file = sys.argv[3] translate_table = open(sys.argv[4], 'w') alg = SeqGroup(in_file) translate = open(transform_fasta, 'w') for num, (name, seq, _) in enumerate(alg): taxid = name.split('.')[0] code = ''.join(random.choices(string.ascii_letters + string.digits, k=5)) #code=format((num+1), '05') #nam_t=taxid+'.'+str(code) print >> translate, '>%s\n%s' % (code, seq) print >> translate_table, '%s\t%s' % (name, code) translate_table.close() translate.close() translate_alg = SeqGroup(transform_fasta) translate_alg.write(format="phylip", outfile=out_file)
for name, seq, _ in alg_aa: try: cdna = F.id2seq[F.name2id[name]] except KeyError: print "cdna for %s not found" % name continue cdna_aln = "" for pos in seq: if pos != "-": cdna_aln += cdna[:3] cdna = cdna[3:] else: cdna_aln += "---" # Last the stop codon cdna_aln += cdna[:3] alg_dna.set_seq(name, cdna_aln) print "Input protein alignment contains %s aa sequences" % len(alg_aa) print "Output cdna alignment contains %s cdna sequences" % len(alg_dna) print alg_dna.write(outfile=infile.replace(".clustalo", ".clustalo.cdna.aln"))
F = parser.fasta.read_fasta(sys.argv[2]) for infile in infiles: print infile if os.stat(infile).st_size == 0: continue alg_aa = SeqGroup(infile) alg_dna = SeqGroup() for name, seq, _ in alg_aa: try: cdna = F.id2seq[F.name2id[name]] except KeyError: print "cdna for %s not found" % name continue cdna_aln = "" for pos in seq: if pos != "-": cdna_aln += cdna[:3] cdna = cdna[3:] else: cdna_aln += "---" # Last the stop codon cdna_aln += cdna[:3] alg_dna.set_seq(name, cdna_aln) print "Input protein alignment contains %s aa sequences" % len(alg_aa) print "Output cdna alignment contains %s cdna sequences" % len(alg_dna) print alg_dna.write(outfile=infile.replace(".clustalo", ".clustalo.cdna.aln"))