def test_bam_file_to_region_fasta(self): '''Test bam_file_to_region_fasta''' tmp = 'tmp.to_fasta.fa' bam = os.path.join(data_dir, 'mapping_test.smalt.out.sorted.bam') mapping.bam_file_to_region_fasta(bam, tmp, 'ref', start=42, end=142) self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'mapping_test.bam_to_region.fasta'), tmp)) os.unlink(tmp)
def _make_new_seed(self, seed_name): if self.verbose: print('Making new seed for', seed_name, ' ... start') tmp_prefix = os.path.join(self.tmpdir, 'out') seed_reads = tmp_prefix + '.' + seed_name + '.reads_1.fa' if len(self.original_seeds[seed_name]) > self.seed_stop_length: start = int(0.5 * len(self.original_seeds[seed_name]) - 0.5 * self.seed_stop_length) end = int(0.5 * len(self.original_seeds[seed_name]) + 0.5 * self.seed_stop_length) else: start = None end = None if self.verbose: print('Making new seed for', seed_name, ' ... getting reads') mapping.bam_file_to_region_fasta(self.bam_file, seed_reads, seed_name, start, end) if self.verbose: print('Making new seed for', seed_name, ' ... finding most common kmer') new_seed = seed.Seed(extend_length=self.extend_length, overlap_length=self.overlap_length, reads1=seed_reads, ext_min_cov=self.ext_min_cov, ext_min_ratio=self.ext_min_ratio, verbose=self.verbose, seed_length=self.seed_length, seed_min_count=self.seed_min_count, seed_max_count=self.seed_max_count, kmc_threads=self.kmc_threads, map_threads=self.threads) if len(new_seed) == 0: print('Warning: could not get most common kmer for', seed_name) return if self.verbose: print('Making new seed for', seed_name, ' ... extending most common kmer') new_seed.extend(self.reads1, self.reads2, self.seed_stop_length) f = pyfastaq.utils.open_file_write(tmp_prefix + '.' + seed_name + '.fa') print(pyfastaq.sequences.Fasta('seed.' + seed_name, new_seed.seq[10:-10]), file=f) pyfastaq.utils.close(f) if self.verbose: print('Making new seed for', seed_name, ' ... finished')
def _make_new_seed(self, seed_name): if self.verbose: print('Making new seed for', seed_name, ' ... start') tmp_prefix = os.path.join(self.tmpdir, 'out') seed_reads = tmp_prefix + '.' + seed_name + '.reads_1.fa' if len(self.original_seeds[seed_name]) > self.seed_stop_length: start = int(0.5 * len(self.original_seeds[seed_name]) - 0.5 * self.seed_stop_length) end = int(0.5 * len(self.original_seeds[seed_name]) + 0.5 * self.seed_stop_length) else: start = None end = None if self.verbose: print('Making new seed for', seed_name, ' ... getting reads') mapping.bam_file_to_region_fasta(self.bam_file, seed_reads, seed_name, start, end) if self.verbose: print('Making new seed for', seed_name, ' ... finding most common kmer') new_seed = seed.Seed( extend_length = self.extend_length, overlap_length = self.overlap_length, reads1 = seed_reads, ext_min_cov = self.ext_min_cov, ext_min_ratio = self.ext_min_ratio, verbose = self.verbose, seed_length = self.seed_length, seed_min_count = self.seed_min_count, seed_max_count = self.seed_max_count, kmc_threads = self.kmc_threads, map_threads = self.threads ) if len(new_seed) == 0: print('Warning: could not get most common kmer for', seed_name) return if self.verbose: print('Making new seed for', seed_name, ' ... extending most common kmer') new_seed.extend(self.reads1, self.reads2, self.seed_stop_length) f = pyfastaq.utils.open_file_write(tmp_prefix + '.' + seed_name + '.fa') print(pyfastaq.sequences.Fasta('seed.' + seed_name, new_seed.seq[10:-10]), file=f) pyfastaq.utils.close(f) if self.verbose: print('Making new seed for', seed_name, ' ... finished')