def test_codons(self): """PositionalBaseUsage codons should give expected codon freqs""" #one of each base should give freqs if 1/64 for everything orig = CodonUsage('UUUCCCAAAGGG') b = orig.positionalBases() final = b.codons() self.assertEqual(len(final), 64) for i in final: self.assertFloatEqual(final[i], 1.0/64) #two bases at each position should give correct freqs orig = CodonUsage('UCGAGUUCGUCG') final = orig.positionalBases().codons() exp = { 'UCG': 0.75 * 0.75 * 0.75, 'UCU': 0.75 * 0.75 * 0.25, 'UGG': 0.75 * 0.25 * 0.75, 'UGU': 0.75 * 0.25 * 0.25, 'ACG': 0.25 * 0.75 * 0.75, 'ACU': 0.25 * 0.75 * 0.25, 'AGG': 0.25 * 0.25 * 0.75, 'AGU': 0.25 * 0.25 * 0.25, } for f in final: if f in exp: self.assertFloatEqual(final[f], exp[f]) else: self.assertEqual(final[f], 0)
def test_aminoAcids(self): """PositionalBaseUsage aminoAcids should return correct amino acids""" #check hand-calculated values on a particular sequence orig = CodonUsage('UCGAGUUCGUCG') final = orig.positionalBases().aminoAcids() exp = { 'S': 0.75 * 0.75 * 0.75 + 0.75 * 0.75 * 0.25 + 0.25*0.25*0.25, 'W': 0.75 * 0.25 * 0.75, 'C': 0.75 * 0.25 * 0.25, 'T': 0.25 * 0.75 * 0.75 + 0.25 * 0.75 * 0.25, 'R': 0.25 * 0.25 * 0.75, } for f in final: if f in exp: self.assertFloatEqual(final[f], exp[f]) else: self.assertEqual(final[f], 0) #test for unbiased freqs on a couple of different genetic codes orig = CodonUsage('UUUCCCAAAGGG') final = orig.positionalBases().aminoAcids() SGC = GeneticCodes[1] for aa in final: self.assertEqual(final[aa], len(SGC[aa])/64.0) mt = GeneticCodes[2] final_mt = orig.positionalBases().aminoAcids(mt) self.assertNotEqual(final, final_mt) for aa in final_mt: self.assertEqual(final_mt[aa], len(mt[aa])/64.0)
def test_positionalBases(self): """CodonUsage bases should count bases at each position correctly""" freqs = {'UUC':5, 'AUA':10, 'AUG':10, 'CGC':3, 'AGG':2, 'XYZ':8, 'UAA':2, 'UGA':1} u = CodonUsage(freqs) b = u.positionalBases() assert isinstance(b, PositionalBaseUsage) first, second, third = b self.assertEqual(first, BaseUsage({'U':8,'C':3,'A':22,'X':8})) self.assertEqual(second, BaseUsage({'U':25,'C':0,'A':2,'G':6,'Y':8})) self.assertEqual(third, BaseUsage({'C':8,'A':13,'G':12,'Z':8})) #check that it also works when we purge p = u.positionalBases(purge_unwanted=True) first, second, third = p self.assertEqual(first, BaseUsage({'U':5,'C':3,'A':2})) self.assertEqual(second, BaseUsage({'U':5,'G':5})) self.assertEqual(third, BaseUsage({'C':8,'G':2})) #check that it also works with a different genetic code, and, #incidentally, that the purging didn't affect the original object u.GeneticCode = GeneticCodes[2] #mt code: different stop codons p = u.positionalBases(purge_unwanted=True) first, second, third = p self.assertEqual(first, BaseUsage({'U':6,'C':3,'A':20})) self.assertEqual(second, BaseUsage({'U':25,'G':4})) self.assertEqual(third, BaseUsage({'C':8,'A':11,'G':10}))
def test_rscu(self): """CodonUsage rscu should calculate synonymous usage correctly""" c = CodonUsage({'UUU':3,'UUC':1,'ACA':1}) c.rscu() self.assertEqual(c['UUU'], 0.75) self.assertEqual(c['UUC'], 0.25) self.assertEqual(c['ACA'], 1) self.assertEqual(c['GGG'], 0)
def test_bases(self): """CodonUsage bases should count bases correctly""" u = CodonUsage('UUUCCCUAGCCCGGGAA') b = u.bases() self.assertEqual(b, BaseUsage('UUUCCCUAGCCCGGGAA')) #purge_unwanted should get rid of bad codons b = u.bases(purge_unwanted=True) self.assertEqual(b, BaseUsage('UUUCCCCCCGGG'))
def test_pr2bias(self): """CodonUsage pr2bias should give correct ratios.""" c = EqualBases.codons() b = c.pr2bias('UU') self.assertEqual(len(b), 6) self.assertEqual(b, tuple([.5]*6)) c = CodonUsage() c['ACU'] = 10 c['ACC'] = 5 c['ACA'] = 15 c['ACG'] = 20 self.assertEqual(c.pr2bias('AC'), (20/25,15/25,20/35,5/15,20/30,5/20))
def test_aminoAcids(self): """CodonUsage aminoAcids should correctly count amino acids""" freqs = {'UUC':5, 'AUA':10, 'AUG':10, 'CGC':3, 'AGG':2, 'XYZ':8, 'UAA':2, 'UGA':1} u = CodonUsage(freqs, "test") self.assertEqual(u.Info, 'test') for key, val in u.items(): if key in freqs: self.assertEqual(val, freqs[key]) else: self.assertEqual(val, 0) aa = u.aminoAcids() self.assertEqual(aa, AminoAcidUsage({'F':5,'I':10,'M':10,'R':5,'*':3,'X':8})) #check that it works with a different genetic code u.GeneticCode = GeneticCodes['2'] aa = u.aminoAcids() self.assertEqual(aa, AminoAcidUsage({'F':5,'I':0,'M':20,'R':3,'*':4,'W':1,'X':8})) #check that it works if a genetic code is supplied explicitly u.GeneticCode = GeneticCodes[1] aa = u.aminoAcids() self.assertEqual(aa, AminoAcidUsage({'F':5,'I':10,'M':10,'R':5,'*':3,'X':8})) aa_2 = u.aminoAcids(2) self.assertEqual(aa_2, AminoAcidUsage({'F':5,'I':0,'M':20,'R':3,'*':4,'W':1,'X':8})) #check that we held onto the info object through the above self.assertEqual(aa_2.Info, 'test')
def test_aminoAcids(self): """BaseUsage aminoAcids should give the same results as the codons""" known_data = { 'AAA' : .6 * .6 * .6, 'AAU' : .6 * .6 * .4, 'AUA' : .6 * .4 * .6, 'AUU' : .6 * .4 * .4, 'UAA' : .4 * .6 * .6, 'UAU' : .4 * .6 * .4, 'UUA' : .4 * .4 * .6, 'UUU' : .4 * .4 * .4, } known = CodonUsage(known_data) b = BaseUsage({'a':3, 'T':2, 'X':1}) self.assertEqual(b.aminoAcids(), known.aminoAcids()) #check that the genetic code is passed through correctly all_g = GeneticCode('G'*64) self.assertEqual(b.aminoAcids(all_g), AminoAcidUsage({'G':1}))
def test_fingerprint(self): """CodonUsage fingerprint should give correct ratios.""" c = EqualBases.codons() f = c.fingerprint() self.assertEqual(len(f), 9) self.assertEqual(f, \ [[.5,.5,.125] for i in range(8)] + [[.5,.5,1]]) #should be able to omit mean... f = c.fingerprint(include_mean=False) self.assertEqual(f, [[.5,.5,.125] for i in range(8)]) #...or use all doublets f = c.fingerprint(include_mean=False, which_blocks='all') self.assertEqual(len(f), 16) #...or do just the non-quartet ones f = c.fingerprint(include_mean=False, which_blocks='split') self.assertEqual(len(f), 6) #check that it doesn't fail on an empty codon usage c = CodonUsage('') f = c.fingerprint() self.assertEqual(f[0], [0.5, 0.5, 0])
def test_codons(self): """AminoAcidUsage codons should return most likely codon freqs""" a = AminoAcidUsage('GGG') c = CodonUsage('GGUGGCGGAGGG') c.normalize() self.assertEqual(a.codons(), c) a = AminoAcidUsage('D') c = CodonUsage('GAUGAC') c.normalize() self.assertEqual(a.codons(), c) a = AminoAcidUsage('GDDFMM') c = CodonUsage('GGUGGCGGAGGG'+'GAUGAC'*4+'UUUUUC'*2+'AUG'*8) c.normalize() self.assertEqual(a.codons(), c) a = AminoAcidUsage('II*') c = CodonUsage('AUUAUCAUA'*2+'UAAUAGUGA') c.normalize() self.assertEqual(a.codons(), c) #check that it works with a nonstandard code code = GeneticCode('A'*4+'C'*28+'G'*32) a = AminoAcidUsage('AAA') c = CodonUsage('UUUUUCUUAUUG') c.normalize() self.assertEqual(a.codons(code), c) #check that it works with unequal codon frequencies unequal = CodonUsage({'GGU':5,'GGC':2,'GGA':2,'GGG':1,'UUU':3,'UUC':1}) a = AminoAcidUsage('GFFF') exp = { 'GGU':0.5*0.25, 'GGC':0.2*0.25, 'GGA':0.2*0.25, 'GGG':0.1*0.25, 'UUU':0.75*0.75, 'UUC':0.25*0.75 } obs = a.codons(codon_usage=unequal) for codon, freq in obs.items(): self.assertFloatEqual(freq, exp.get(codon, 0))
def test_codons(self): """CodonUsage codons should return same object""" u = CodonUsage('abc') c = u.codons() assert u is c
def test_init_string(self): """CodonUsage should count codons in string""" u = CodonUsage('UUUCCCUUUUUUGA') self.assertEqual(u, CodonUsage({'UUU':3, 'CCC':1, 'GA':1})) u.normalize() self.assertEqual(u, CodonUsage({'UUU':0.75, 'CCC':0.25}))