Beispiel #1
0
 def test_four_kmers(self):
     k = Kmers('AAAAATTTTT', 4)
     self.assertEqual(k.get_all_kmers_counter(max_kmer_count=5), {
         'AAAA': 0,
         'AAAT': 0,
         'AATT': 0,
         'ATTT': 0,
         'TTTT': 0
     })
Beispiel #2
0
	def sequence_kmers(self):
		seq_counter = 0
		
		kmer_to_sequences = {}
		for record in SeqIO.parse(self.filename, "fasta"):
			sequence_length  = len(record.seq)
			if self.divisible_by_3 and sequence_length % 3 != 0:
				self.logger.warning("Excluding gene as it is not divisible by 3:"+record.id)
				continue
			
			kmers = Kmers(str(record.seq), self.k)
			# We assume here that the sequence name is unique in the FASTA file
			kmer_to_sequences[record.id] = kmers.get_all_kmers_counter()
			
			seq_counter += 1
			
		return kmer_to_sequences
Beispiel #3
0
 def test_short_sequence(self):
     k = Kmers('A', 10)
     self.assertEqual(k.get_all_kmers_counter(), {})