def seq_not_lca_lce(self, seqs):
     all_kmers = ''
     for seq in seqs:
         kmers = tools_lc.seq_to_kmers(seq, self.k)
         for kmer in kmers:
             if tools_lc.lce_motif(kmer, self.lce):
                 if not tools_lc.lca_motif(kmer, self.lca):
                     all_kmers += kmer
     return all_kmers
 def count_lca_and_lce(self, seqs):
     all_counts = []
     for seq in seqs:
         count = 0
         kmers = tools_lc.seq_to_kmers(seq, self.k)
         for kmer in kmers:
             if tools_lc.lce_motif(kmer, self.lce):
                 if tools_lc.lca_motif(kmer, self.lca):
                     count += 1
         all_counts.append(count)
     return all_counts
Exemple #3
0
 def count_lca_charge(self, seqs):
     lca_counts = []
     for seq in seqs:
         lca_motifs = 0
         kmers = tools_lc.seq_to_kmers(seq, self.k)
         for kmer in kmers:
             if tools_lc.lca_motif(kmer, self.lca):
                 if not tools_lc.lce_motif(kmer, self.lce):
                     if ('K' in kmer) and ('R' in kmer) and ('E' in kmer):
                         lca_motifs += 1
         lca_counts.append(lca_motifs)
     return lca_counts
 def seq_lca(self, seqs):
     seq_kmers = []
     lca_counts = []
     for seq in seqs:
         lca_motifs = 0
         kmer_str = ''
         kmers = tools_lc.seq_to_kmers(seq, self.k)
         for kmer in kmers:
             if tools_lc.lca_motif(kmer, self.lca):
                 kmer_str += kmer
                 lca_motifs += 1
         lca_counts.append(lca_motifs)
         seq_kmers.append(kmer_str)
     return lca_counts, seq_kmers
Exemple #5
0
 def create_lces(self, all_seqs):
     """
     Return all the possible shannon entropies in my data set for the given
     k-mer length, rounded up to the nearest 0.1.
     """
     all_shannon = set()
     for seq in all_seqs:
         kmers = tools_lc.seq_to_kmers(seq, self.k)
         for kmer in kmers:
             s = tools_lc.shannon(kmer)
             all_shannon.add(s)
     new_scores = []
     all_shannon = sorted(list(all_shannon), reverse=True)
     for score in all_shannon[1:]:
         new_score = self._round_up(score)
         new_scores.append(new_score)
         new_scores = sorted(list(set(new_scores)), reverse=True)
     lce_labels = self.format_labels(new_scores)
     return lce_labels
Exemple #6
0
 def process_seq(self, seq, k):
     kmers = tools_lc.seq_to_kmers(seq, k)
     qn = self.alph_fracs(kmers, 'QN')
     st = self.alph_fracs(kmers, 'ST')
     ag = self.alph_fracs(kmers, 'AG')
     p = self.alph_fracs(kmers, 'P')
     ed = self.alph_fracs(kmers, 'ED')
     kr = self.alph_fracs(kmers, 'KR')
     f = self.alph_fracs(kmers, 'F')
     r = self.alph_fracs(kmers, 'R')
     #plt.plot(qn, label='QN')
     plt.plot(st, label='ST')
     #plt.plot(ag, label='AG')
     #plt.plot(r, label='R')
     #plt.plot(f, label='F')
     lca_x, lca_y, lce_x, lce_y = self.get_motif_index(seq)
     plt.scatter(lca_x, lca_y, color='black', s=2)
     plt.scatter(lce_x, lce_y, color='red', s=2)
     #plt.plot(ed, label='ED')
     #plt.plot(kr, label='KR')
     plt.plot(p, label='P')
     plt.legend()
     plt.show()
 def count_kmers(self, seqs):
     total_kmers = 0
     for seq in seqs:
         kmers = tools_lc.seq_to_kmers(seq, self.k)
         total_kmers += len(kmers)
     return total_kmers