def mainKmerFinder(_seq, _k, _d): kmers = kmer_finder_mismatches(_seq, _k, _d) seq_kmers = list() for i in xrange(len(_seq) - _k + 1): seq_kmers.append(_seq[i : i + _k]) seq_kmers.append(reverse(_seq[i : i + _k])) output = list() for kmer in kmers: if kmer in seq_kmers: output.append(kmer) return output
def kmer_finder_mismatches(seq, k, d): kmer_freq = defaultdict(int) for i in range(len(seq)-k+1): kmer_freq[seq[i:i+k]] += 1 mismatch_count = defaultdict(int) for kmer, freq in kmer_freq.iteritems(): temp_mutations = set() for mismatch in mutations(kmer, d): temp_mutations.add(mismatch) for mismatch in temp_mutations: mismatch_count[mismatch] += freq temp_mutations = set() for mismatch in mutations(reverse(kmer), d): temp_mutations.add(mismatch) for mismatch in temp_mutations: mismatch_count[mismatch] += freq max_count = max(mismatch_count.values()) return sorted([kmer for kmer, count in mismatch_count.iteritems() if count == max_count])