Example #1
0
 def col_consensus(col, prev_col=[], prev_char=[]):
     # Count the amino acid types in this column
     aa_counts = sequtils.aa_frequencies(col)
     assert aa_counts, "Column is all gaps! That's not allowed."
     # Take the most common residue(s)
     best_char, best_score = max(aa_counts.iteritems(),
                                 key=lambda kv: kv[1])
     # Resolve ties
     ties = [aa for aa in aa_counts if aa_counts[aa] == best_score]
     if len(ties) > 1:
         # Breaker #1: most common after the prev. consensus char
         # Resolve a tied col by restricting to rows where the preceding
         # char is the consensus type for that (preceding) col
         if prev_char and prev_col:
             mc_next = Counter(
                     [b for a, b in zip(prev_col, col)
                         if a == prev_char[0] and b in ties]
                     ).most_common()
             ties_next = [x[0] for x in mc_next
                     if x[1] == mc_next[0][1]]
             if ties_next:
                 ties = ties_next
         if len(ties) > 1:
             # Breaker #2: lowest overall residue frequency
             ties.sort(key=lambda aa: bg_freqs[aa])
         best_char = ties[0]
     else:
         assert best_char == ties[0], \
                 'WTF %s != %s[0]' % (best_char, ties)
     # Save values for tie-breaker #1
     prev_col[:] = col
     prev_char[:] = best_char
     return best_char
Example #2
0
 def col_consensus(col):
     col_freqs = sequtils.aa_frequencies(col)
     entroper = entropy_func(col_freqs, bg_freqs)
     try:
         return max(col_freqs.keys(), key=entroper)
     except ValueError:
         # this is probably caused by gaps
         # TODO: modify this to make it more easier to debug
         return '-'
Example #3
0
 def col_consensus(col):
     col_freqs = sequtils.aa_frequencies(col)
     entroper = entropy_func(col_freqs, bg_freqs)
     return max(col_freqs.keys(), key=entroper)