def summary_allele(qbase):
    "Counts the number of reads for each allele in genotype. Return the two most expressed"
    base_count = {
        'A': qbase.count('A'),
        'C': qbase.count('C'),
        'G': qbase.count('G'),
        'T': qbase.count('T'),
        'N': qbase.count('N')
    }
    call = heapq.nlargest(3, base_count.iteritems(), heapq.itemgetter(1))
    return (call)
Ejemplo n.º 2
0
 def getResults(self, top_n = -1):
     """Returns the n kmers which occur most frequently,
     input: a kmer dictionary with occurrence as values,
     returns: a set of pairs with the n most frequent kmers"""
     if self.results is None:
         self.findMotifs()
     if top_n == -1:
         top_n = len(self.results)
     kmers = {}
     for i in xrange(len(self.results)):
         kmers[self.results[i].getMotif()] = self.results[i].getScore()
     return nlargest(top_n, kmers.iteritems(), itemgetter(1))
Ejemplo n.º 3
0
 def getResults(self, top_n=-1):
     """Returns the n kmers which occur most frequently,
     input: a kmer dictionary with occurrence as values,
     returns: a set of pairs with the n most frequent kmers"""
     if self.results is None:
         self.findMotifs()
     if top_n == -1:
         top_n = len(self.results)
     kmers = {}
     for i in xrange(len(self.results)):
         kmers[self.results[i].getMotif()] = self.results[i].getScore()
     return nlargest(top_n, kmers.iteritems(), itemgetter(1))
Ejemplo n.º 4
0
 def countKmers(self, top_n = -1):
     """Puts all k-mers occurring in a string into a dictionary 
     and returns it, with values containing the number of occurrences."""
     kmers = {}
     for seq in self.seqs.getAllSequences():
         seq = seq.upper()
         for i in xrange(len(seq)):
             kmer = seq[i:i + self.getKmerLength()]
             if len(kmer) == self.getKmerLength():
                 if kmer in kmers:
                     kmers[kmer] += 1
                 else:
                     kmers[kmer] = 1
     if top_n == -1:
         return kmers
     else:
         top_list = nlargest(top_n, kmers.iteritems(), itemgetter(1))
         top_dict = {}
         for key, value in top_list:
             top_dict[key] = value
         return top_dict
Ejemplo n.º 5
0
 def countKmers(self, top_n=-1):
     """Puts all k-mers occurring in a string into a dictionary 
     and returns it, with values containing the number of occurrences."""
     kmers = {}
     for seq in self.seqs.getAllSequences():
         seq = seq.upper()
         for i in xrange(len(seq)):
             kmer = seq[i:i + self.getKmerLength()]
             if len(kmer) == self.getKmerLength():
                 if kmer in kmers:
                     kmers[kmer] += 1
                 else:
                     kmers[kmer] = 1
     if top_n == -1:
         return kmers
     else:
         top_list = nlargest(top_n, kmers.iteritems(), itemgetter(1))
         top_dict = {}
         for key, value in top_list:
             top_dict[key] = value
         return top_dict
def summary_allele(qbase):
    "Counts the number of reads for each allele in genotype. Return the two most expressed"
    base_count= {'A':qbase.count('A'), 'C':qbase.count('C'), 'G':qbase.count('G'), 'T':qbase.count('T'), 'N':qbase.count('N')}
    call= heapq.nlargest(3, base_count.iteritems(), heapq.itemgetter(1))
    return(call)