def kmer_kernel(seq, k, canonical=True): kmer_counts = {} if canonical: seq_rc = dna.rc(seq) for i in range(len(seq)-k+1): kmer = seq[i:i+k] if kmer.find('N') == -1: kmer_counts[kmer] = kmer_counts.get(kmer,0) + 1 if canonical: kmer_rc = seq_rc[i:i+k] kmer_counts[kmer_rc] = kmer_counts.get(kmer_rc,0) + 1 if canonical: kmer_counts = dna.canonical_kmers(kmer_counts) # normalize # kmer_sum = float(sum(kmer_counts.values())) kmer_sum = float(sum(np.square(list(kmer_counts.values())))) vec = {} for kmer in kmer_counts: vec[kmer] = kmer_counts[kmer] / kmer_sum return vec
def kmer_kernel(seq, k, canonical=True): kmer_counts = {} if canonical: seq_rc = dna.rc(seq) for i in range(len(seq) - k + 1): kmer = seq[i:i + k] if kmer.find('N') == -1: kmer_counts[kmer] = kmer_counts.get(kmer, 0) + 1 if canonical: kmer_rc = seq_rc[i:i + k] kmer_counts[kmer_rc] = kmer_counts.get(kmer_rc, 0) + 1 if canonical: kmer_counts = dna.canonical_kmers(kmer_counts) # normalize # kmer_sum = float(sum(kmer_counts.values())) kmer_sum = float(sum(np.square(list(kmer_counts.values())))) vec = {} for kmer in kmer_counts: vec[kmer] = kmer_counts[kmer] / kmer_sum return vec
def __init__(self, header, seq, k): self.header = header self.kmers = dna.canonical_kmers(dna.count_kmers(k, seq.upper(), True))
def __init__(self, header, seq, k): self.header = header self.kmers = dna.canonical_kmers(dna.count_kmers(k,seq.upper(),True))