def getKL(self, usage): """return Kullback-Leibler Divergence (relative entropy) of sequences with respect to reference codon usage. """ e = 0 freqs = Genomics.CalculateCodonFrequenciesFromCounts( self.mCodonCounts, self.mPseudoCounts) for codon, count in self.mCodonCounts.items(): e += usage[codon] * math.log(usage[codon] / freqs[codon]) return e
def getEntropy(self, usage=None): """return entropy of a source in terms of a reference usage. Also called conditional entropy or encoding cost. Note that here I compute the sum over 20 entropies, one for each amino acid. If not given, calculate entropy. """ e = 0 freqs = Genomics.CalculateCodonFrequenciesFromCounts( self.mCodonCounts, self.mPseudoCounts) if usage == None: usage = freqs for codon, count in self.mCodonCounts.items(): e -= freqs[codon] * math.log(usage[codon]) return e
def updateProperties(self): SequencePropertiesCodons.updateProperties(self) self.mCodonFrequencies = Genomics.CalculateCodonFrequenciesFromCounts( self.mCodonCounts)