def seq_not_lca_lce(self, seqs): all_kmers = '' for seq in seqs: kmers = tools_lc.seq_to_kmers(seq, self.k) for kmer in kmers: if tools_lc.lce_motif(kmer, self.lce): if not tools_lc.lca_motif(kmer, self.lca): all_kmers += kmer return all_kmers
def count_lca_and_lce(self, seqs): all_counts = [] for seq in seqs: count = 0 kmers = tools_lc.seq_to_kmers(seq, self.k) for kmer in kmers: if tools_lc.lce_motif(kmer, self.lce): if tools_lc.lca_motif(kmer, self.lca): count += 1 all_counts.append(count) return all_counts
def count_lca_charge(self, seqs): lca_counts = [] for seq in seqs: lca_motifs = 0 kmers = tools_lc.seq_to_kmers(seq, self.k) for kmer in kmers: if tools_lc.lca_motif(kmer, self.lca): if not tools_lc.lce_motif(kmer, self.lce): if ('K' in kmer) and ('R' in kmer) and ('E' in kmer): lca_motifs += 1 lca_counts.append(lca_motifs) return lca_counts
def seq_lca(self, seqs): seq_kmers = [] lca_counts = [] for seq in seqs: lca_motifs = 0 kmer_str = '' kmers = tools_lc.seq_to_kmers(seq, self.k) for kmer in kmers: if tools_lc.lca_motif(kmer, self.lca): kmer_str += kmer lca_motifs += 1 lca_counts.append(lca_motifs) seq_kmers.append(kmer_str) return lca_counts, seq_kmers
def create_lces(self, all_seqs): """ Return all the possible shannon entropies in my data set for the given k-mer length, rounded up to the nearest 0.1. """ all_shannon = set() for seq in all_seqs: kmers = tools_lc.seq_to_kmers(seq, self.k) for kmer in kmers: s = tools_lc.shannon(kmer) all_shannon.add(s) new_scores = [] all_shannon = sorted(list(all_shannon), reverse=True) for score in all_shannon[1:]: new_score = self._round_up(score) new_scores.append(new_score) new_scores = sorted(list(set(new_scores)), reverse=True) lce_labels = self.format_labels(new_scores) return lce_labels
def process_seq(self, seq, k): kmers = tools_lc.seq_to_kmers(seq, k) qn = self.alph_fracs(kmers, 'QN') st = self.alph_fracs(kmers, 'ST') ag = self.alph_fracs(kmers, 'AG') p = self.alph_fracs(kmers, 'P') ed = self.alph_fracs(kmers, 'ED') kr = self.alph_fracs(kmers, 'KR') f = self.alph_fracs(kmers, 'F') r = self.alph_fracs(kmers, 'R') #plt.plot(qn, label='QN') plt.plot(st, label='ST') #plt.plot(ag, label='AG') #plt.plot(r, label='R') #plt.plot(f, label='F') lca_x, lca_y, lce_x, lce_y = self.get_motif_index(seq) plt.scatter(lca_x, lca_y, color='black', s=2) plt.scatter(lce_x, lce_y, color='red', s=2) #plt.plot(ed, label='ED') #plt.plot(kr, label='KR') plt.plot(p, label='P') plt.legend() plt.show()
def count_kmers(self, seqs): total_kmers = 0 for seq in seqs: kmers = tools_lc.seq_to_kmers(seq, self.k) total_kmers += len(kmers) return total_kmers