def map_kmers_to_read(self, sequence, read, candidate_gene_names): self.logger.info("Map k-mers to read") seq_length = len(sequence) end = seq_length - self.k kmers_obj = Kmers(sequence, self.k, self.homopolyer_compression) read_kmers = kmers_obj.get_all_kmers( max_kmer_count=self.max_kmer_count) is_read_matching = False sequence_hits, hit_counter, read_kmer_hits = self.put_kmers_in_read_bins( seq_length, end, self.fasta_kmers, read_kmers) blocks_obj = Blocks(self.k, self.min_block_size, self.max_gap, self.margin) block_start, block_end = blocks_obj.find_largest_block(sequence_hits) block_start = blocks_obj.adjust_block_start(block_start) block_end = blocks_obj.adjust_block_end(block_end, seq_length) block_kmers = self.create_kmers_for_block(block_start, block_end, read_kmer_hits) is_read_matching = self.apply_kmers_to_genes(self.fasta_obj, block_kmers, candidate_gene_names) if self.filtered_reads_file: self.append_read_to_fastq_file(read, block_start, block_end) return is_read_matching
def test_two_blocks(self): b = Blocks(7, 7, 2, 5) hits = [ 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] self.assertEqual(b.find_all_blocks(hits), [[3, 14], [27, 36]])
def test_largest_block(self): b = Blocks(7, 7, 2, 5) hits = [ 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] self.assertEqual(b.find_largest_block(hits), (3, 14))
def test_adjust_block_end(self): b = Blocks(7, 0, 0, 10) self.assertEqual(b.adjust_block_end(10, 90), 80) self.assertEqual(b.adjust_block_end(10, 80), 80) self.assertEqual(b.adjust_block_end(10, 50), 50)
def test_adjust_block_start(self): b = Blocks(7, 0, 0, 10) self.assertEqual(b.adjust_block_start(10), 60) self.assertEqual(b.adjust_block_start(1), 0)
def test_merging_blocks(self): b = Blocks(7, 7, 3, 5) blocks = [[4, 8], [10, 14], [20, 24], [26, 30]] self.assertEqual(b.merge_blocks(blocks), [[4, 14], [4, 14], [20, 30], [20, 30]])