Ejemplo n.º 1
0
def mainKmerFinder(_seq, _k, _d):
    kmers = kmer_finder_mismatches(_seq, _k, _d)
    seq_kmers = list()
    for i in xrange(len(_seq) - _k + 1):
        seq_kmers.append(_seq[i : i + _k])
        seq_kmers.append(reverse(_seq[i : i + _k]))

    output = list()
    for kmer in kmers:
        if kmer in seq_kmers:
            output.append(kmer)

    return output
Ejemplo n.º 2
0
def kmer_finder_mismatches(seq, k, d):
    kmer_freq = defaultdict(int)
    for i in range(len(seq)-k+1):
        kmer_freq[seq[i:i+k]] += 1

    mismatch_count = defaultdict(int) 
    for kmer, freq in kmer_freq.iteritems():
        temp_mutations = set()
        for mismatch in mutations(kmer, d):
            temp_mutations.add(mismatch)
        for mismatch in temp_mutations:
            mismatch_count[mismatch] += freq

        temp_mutations = set()
        for mismatch in mutations(reverse(kmer), d):
            temp_mutations.add(mismatch)
        for mismatch in temp_mutations:
            mismatch_count[mismatch] += freq

    max_count = max(mismatch_count.values())
    return sorted([kmer for kmer, count in mismatch_count.iteritems() if count == max_count])