def summary_allele(qbase): "Counts the number of reads for each allele in genotype. Return the two most expressed" base_count = { 'A': qbase.count('A'), 'C': qbase.count('C'), 'G': qbase.count('G'), 'T': qbase.count('T'), 'N': qbase.count('N') } call = heapq.nlargest(3, base_count.iteritems(), heapq.itemgetter(1)) return (call)
def getResults(self, top_n = -1): """Returns the n kmers which occur most frequently, input: a kmer dictionary with occurrence as values, returns: a set of pairs with the n most frequent kmers""" if self.results is None: self.findMotifs() if top_n == -1: top_n = len(self.results) kmers = {} for i in xrange(len(self.results)): kmers[self.results[i].getMotif()] = self.results[i].getScore() return nlargest(top_n, kmers.iteritems(), itemgetter(1))
def getResults(self, top_n=-1): """Returns the n kmers which occur most frequently, input: a kmer dictionary with occurrence as values, returns: a set of pairs with the n most frequent kmers""" if self.results is None: self.findMotifs() if top_n == -1: top_n = len(self.results) kmers = {} for i in xrange(len(self.results)): kmers[self.results[i].getMotif()] = self.results[i].getScore() return nlargest(top_n, kmers.iteritems(), itemgetter(1))
def countKmers(self, top_n = -1): """Puts all k-mers occurring in a string into a dictionary and returns it, with values containing the number of occurrences.""" kmers = {} for seq in self.seqs.getAllSequences(): seq = seq.upper() for i in xrange(len(seq)): kmer = seq[i:i + self.getKmerLength()] if len(kmer) == self.getKmerLength(): if kmer in kmers: kmers[kmer] += 1 else: kmers[kmer] = 1 if top_n == -1: return kmers else: top_list = nlargest(top_n, kmers.iteritems(), itemgetter(1)) top_dict = {} for key, value in top_list: top_dict[key] = value return top_dict
def countKmers(self, top_n=-1): """Puts all k-mers occurring in a string into a dictionary and returns it, with values containing the number of occurrences.""" kmers = {} for seq in self.seqs.getAllSequences(): seq = seq.upper() for i in xrange(len(seq)): kmer = seq[i:i + self.getKmerLength()] if len(kmer) == self.getKmerLength(): if kmer in kmers: kmers[kmer] += 1 else: kmers[kmer] = 1 if top_n == -1: return kmers else: top_list = nlargest(top_n, kmers.iteritems(), itemgetter(1)) top_dict = {} for key, value in top_list: top_dict[key] = value return top_dict
def summary_allele(qbase): "Counts the number of reads for each allele in genotype. Return the two most expressed" base_count= {'A':qbase.count('A'), 'C':qbase.count('C'), 'G':qbase.count('G'), 'T':qbase.count('T'), 'N':qbase.count('N')} call= heapq.nlargest(3, base_count.iteritems(), heapq.itemgetter(1)) return(call)