def switch_to_codon(alg_fasta_file, kept_columns=None): # Check conservation of columns. If too many identities, # switch to codon alignment and make the tree with DNA. # Mixed models is another possibility. if kept_columns: kept_columns = set(map(int, kept_columns)) else: kept_columns = [] #all_nt_alg = SeqGroup(nt_seed_file) aa_alg = SeqGroup(alg_fasta_file) nt_alg = SeqGroup() for seqname, aaseq, comments in aa_alg.iter_entries(): #ntseq = all_nt_alg.get_seq(seqname).upper() ntseq = db.get_seq(seqname, "nt").upper() ntalgseq = [] nt_pos = 0 for pos, ch in enumerate(aaseq): if ch in GAP_CHARS: codon = "---" else: codon = ntseq[nt_pos:nt_pos+3] nt_pos += 3 if not kept_columns or pos in kept_columns: # we trust the sequence in DB, consistency should have been # checked during the start up ntalgseq.append(codon) ntalgseq = "".join(ntalgseq) nt_alg.set_seq(seqname, ntalgseq) return nt_alg