Example #1
0
def store_low_CAA_genes(genes):
    """ Create list of genes where CAA usage < 0.9
    """
    # compute codon usage
    print('Computing codon statistics')
    dnana = DNAAnalyzer(strict=False)
    data = dnana.get_gene_codon_usages(genes)

    def compute_norm(gene, *args):
        """ Compute normalized occurrence frequency of aa
        """
        all_codon_num = dnana._count_codons(str(gene.seq))
        aa_num = sum([all_codon_num[codon] for codon in args])
        norm = aa_num * 1000 / len(gene.seq)
        return norm

    avg_codon_freqs = dnana.get_codon_freqs(genes)
    print('  LYS freq: %f\n' %
          (avg_codon_freqs['AAA'] + avg_codon_freqs['AAG']) +
          '  GLU freq: %f\n' %
          (avg_codon_freqs['GAA'] + avg_codon_freqs['GAG']) +
          '  GLN freq: %f' % (avg_codon_freqs['CAA'] + avg_codon_freqs['CAG']))

    # filter for genes
    low_CAA_genes = []
    for gene, codu in data.items():
        if not codu['CAA'] is None and codu['CAA'] < 0.9:
            lys_freq = (compute_norm(gene, 'AAA', 'AAG') / 1000) / (
                avg_codon_freqs['AAA'] + avg_codon_freqs['AAG'])
            glu_freq = (compute_norm(gene, 'GAA', 'GAG') / 1000) / (
                avg_codon_freqs['GAA'] + avg_codon_freqs['GAG'])
            gln_freq = (compute_norm(gene, 'CAA', 'CAG') / 1000) / (
                avg_codon_freqs['CAA'] + avg_codon_freqs['CAG'])

            low_CAA_genes.append(
                (gene.id, extract_gene_name(gene), lys_freq, codu['AAA'],
                 glu_freq, codu['GAA'], gln_freq, codu['CAA']))

    # store results
    with open('results/low_CAA_genes.csv', 'w') as fd:
        wrtr = csv.writer(fd)
        wrtr.writerow([
            'ID', 'name', 'LYS rel freq', 'CU: AAA', 'GLU rel freq', 'CU: GAA',
            'GLN rel freq', 'CU: CAA'
        ])

        for entry in low_CAA_genes:
            wrtr.writerow(entry)
Example #2
0
class TestCodonUsage(TestCase):
    def setUp(self):
        self.dnana = DNAAnalyzer()
        self.seq = 'AAAAAGAAA'
        self.genes = [
            SeqRecord(Seq('AAAAAAAAG')),
            SeqRecord(Seq('AAAAAGAAA')),
            SeqRecord(Seq('TTTTTCTTT')),
            SeqRecord(Seq('TTCTTTTTC'))
        ]

    def test_codon_counter(self):
        count = self.dnana._count_codons(self.seq)

        self.assertEqual(count['AAA'], 2)
        self.assertEqual(count['AAG'], 1)
        self.assertEqual(count['AAT'], 0)
        self.assertEqual(count['AAC'], 0)

    def test_codon_usage(self):
        codu = self.dnana.get_codon_usage(self.seq)

        self.assertEqual(round(codu['AAA'], 3), round(0.6666, 3))
        self.assertEqual(round(codu['AAG'], 3), round(0.3333, 3))
        self.assertEqual(codu['AAT'], None)
        self.assertEqual(codu['AAC'], None)

    def test_average_codon_usage(self):
        avg_codu = self.dnana.get_avg_codon_usage(self.genes)

        self.assertEqual(round(avg_codu['AAA'], 3), round(0.6666, 3))
        self.assertEqual(round(avg_codu['AAG'], 3), round(0.3333, 3))
        self.assertEqual(avg_codu['AAT'], None)
        self.assertEqual(avg_codu['AAC'], None)
        self.assertEqual(round(avg_codu['TTT'], 3), round(0.5, 3))
        self.assertEqual(round(avg_codu['TTC'], 3), round(0.5, 3))

    def test_codon_frequencies(self):
        avg_cod_freqs = self.dnana.get_codon_freqs(self.genes)

        self.assertEqual(round(avg_cod_freqs['AAA'], 3), 0.333)
        self.assertEqual(round(avg_cod_freqs['AAG'], 3), 0.167)
        self.assertEqual(round(avg_cod_freqs['TTT'], 3), 0.25)
        self.assertEqual(round(avg_cod_freqs['TTC'], 3), 0.25)