class LysineAbundanceFilter(BaseFilter): """ Only allow gene which code for more than 76.6 lysines if scaled to a length of 1000 bases """ def __init__(self): self.dnaa = DNAAnalyzer(strict=False) def apply(self, record): res = self.dnaa._count_codons(str(record.seq)) lysin = res['AAA'] + res['AAG'] norm = lysin * 1000 / len(record.seq) return norm > 76.6
class TestCodonUsage(TestCase): def setUp(self): self.dnana = DNAAnalyzer() self.seq = 'AAAAAGAAA' self.genes = [ SeqRecord(Seq('AAAAAAAAG')), SeqRecord(Seq('AAAAAGAAA')), SeqRecord(Seq('TTTTTCTTT')), SeqRecord(Seq('TTCTTTTTC')) ] def test_codon_counter(self): count = self.dnana._count_codons(self.seq) self.assertEqual(count['AAA'], 2) self.assertEqual(count['AAG'], 1) self.assertEqual(count['AAT'], 0) self.assertEqual(count['AAC'], 0) def test_codon_usage(self): codu = self.dnana.get_codon_usage(self.seq) self.assertEqual(round(codu['AAA'], 3), round(0.6666, 3)) self.assertEqual(round(codu['AAG'], 3), round(0.3333, 3)) self.assertEqual(codu['AAT'], None) self.assertEqual(codu['AAC'], None) def test_average_codon_usage(self): avg_codu = self.dnana.get_avg_codon_usage(self.genes) self.assertEqual(round(avg_codu['AAA'], 3), round(0.6666, 3)) self.assertEqual(round(avg_codu['AAG'], 3), round(0.3333, 3)) self.assertEqual(avg_codu['AAT'], None) self.assertEqual(avg_codu['AAC'], None) self.assertEqual(round(avg_codu['TTT'], 3), round(0.5, 3)) self.assertEqual(round(avg_codu['TTC'], 3), round(0.5, 3)) def test_codon_frequencies(self): avg_cod_freqs = self.dnana.get_codon_freqs(self.genes) self.assertEqual(round(avg_cod_freqs['AAA'], 3), 0.333) self.assertEqual(round(avg_cod_freqs['AAG'], 3), 0.167) self.assertEqual(round(avg_cod_freqs['TTT'], 3), 0.25) self.assertEqual(round(avg_cod_freqs['TTC'], 3), 0.25)
def find_special_AAA_freqs(genes): #id_filter = ['DDB0305421|DDB_G0276433', 'DDB0347990|DDB_G0289359', 'DDB0347948|DDB_G0270662', 'DDB0349097|DDB_G0279651', 'DDB0306784|DDB_G0293038', 'DDB0218505|DDB_G0283527', 'DDB0348150|DDB_G0285779', 'DDB0347690|DDB_G0286087'] # AAA=0 #id_filter = ['DDB0230164|DDB_G0293360', 'DDB0186263|DDB_G0284929', 'DDB0232396|DDB_G0282423', 'DDB0238636|DDB_G0269008', 'DDB0234236|DDB_G0289721', 'DDB0229439|DDB_G0270122'] # AAA=1 id_filter = ['DDB0348668|DDB_G0276223', 'DDB0307442|DDB_G0269954', 'DDB0307413|DDB_G0269350', 'DDB0308362|DDB_G0269090', 'DDB0216219|DDB_G0269132'] # long AAA=1 def get_record(gene_id): for gene in genes: if gene.id == gene_id: return gene return None dnaa = DNAAnalyzer() for gid in id_filter: rec = get_record(gid) if not rec is None: print(rec.id) print(' ', 'gene length:', len(rec.seq)) coco = dnaa._count_codons(str(rec.seq)) print(' ', 'AAA:', coco['AAA']) print(' ', 'AAG:', coco['AAG']) print()