def test_four_kmers(self): k = Kmers('AAAAATTTTT', 4) self.assertEqual(k.get_all_kmers_counter(max_kmer_count=5), { 'AAAA': 0, 'AAAT': 0, 'AATT': 0, 'ATTT': 0, 'TTTT': 0 })
def sequence_kmers(self): seq_counter = 0 kmer_to_sequences = {} for record in SeqIO.parse(self.filename, "fasta"): sequence_length = len(record.seq) if self.divisible_by_3 and sequence_length % 3 != 0: self.logger.warning("Excluding gene as it is not divisible by 3:"+record.id) continue kmers = Kmers(str(record.seq), self.k) # We assume here that the sequence name is unique in the FASTA file kmer_to_sequences[record.id] = kmers.get_all_kmers_counter() seq_counter += 1 return kmer_to_sequences
def test_short_sequence(self): k = Kmers('A', 10) self.assertEqual(k.get_all_kmers_counter(), {})