def kmer_kernel(seq, k, canonical=True):
    kmer_counts = {}

    if canonical:
        seq_rc = dna.rc(seq)

    for i in range(len(seq)-k+1):
        kmer = seq[i:i+k]
        if kmer.find('N') == -1:
            kmer_counts[kmer] = kmer_counts.get(kmer,0) + 1

            if canonical:
                kmer_rc = seq_rc[i:i+k]
                kmer_counts[kmer_rc] = kmer_counts.get(kmer_rc,0) + 1

    if canonical:
        kmer_counts = dna.canonical_kmers(kmer_counts)

    # normalize
    # kmer_sum = float(sum(kmer_counts.values()))
    kmer_sum = float(sum(np.square(list(kmer_counts.values()))))

    vec = {}
    for kmer in kmer_counts:
        vec[kmer] = kmer_counts[kmer] / kmer_sum

    return vec
def kmer_kernel(seq, k, canonical=True):
    kmer_counts = {}

    if canonical:
        seq_rc = dna.rc(seq)

    for i in range(len(seq) - k + 1):
        kmer = seq[i:i + k]
        if kmer.find('N') == -1:
            kmer_counts[kmer] = kmer_counts.get(kmer, 0) + 1

            if canonical:
                kmer_rc = seq_rc[i:i + k]
                kmer_counts[kmer_rc] = kmer_counts.get(kmer_rc, 0) + 1

    if canonical:
        kmer_counts = dna.canonical_kmers(kmer_counts)

    # normalize
    # kmer_sum = float(sum(kmer_counts.values()))
    kmer_sum = float(sum(np.square(list(kmer_counts.values()))))

    vec = {}
    for kmer in kmer_counts:
        vec[kmer] = kmer_counts[kmer] / kmer_sum

    return vec
Exemple #3
0
 def __init__(self, header, seq, k):
     self.header = header
     self.kmers = dna.canonical_kmers(dna.count_kmers(k, seq.upper(), True))
Exemple #4
0
 def __init__(self, header, seq, k):
     self.header = header
     self.kmers = dna.canonical_kmers(dna.count_kmers(k,seq.upper(),True))