Exemple #1
0
 def test_bam_file_to_region_fasta(self):
     '''Test bam_file_to_region_fasta'''
     tmp = 'tmp.to_fasta.fa'
     bam = os.path.join(data_dir, 'mapping_test.smalt.out.sorted.bam')
     mapping.bam_file_to_region_fasta(bam, tmp, 'ref', start=42, end=142)
     self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'mapping_test.bam_to_region.fasta'), tmp))
     os.unlink(tmp)
Exemple #2
0
    def _make_new_seed(self, seed_name):
        if self.verbose:
            print('Making new seed for', seed_name, ' ... start')
        tmp_prefix = os.path.join(self.tmpdir, 'out')
        seed_reads = tmp_prefix + '.' + seed_name + '.reads_1.fa'
        if len(self.original_seeds[seed_name]) > self.seed_stop_length:
            start = int(0.5 * len(self.original_seeds[seed_name]) -
                        0.5 * self.seed_stop_length)
            end = int(0.5 * len(self.original_seeds[seed_name]) +
                      0.5 * self.seed_stop_length)
        else:
            start = None
            end = None
        if self.verbose:
            print('Making new seed for', seed_name, ' ... getting reads')
        mapping.bam_file_to_region_fasta(self.bam_file, seed_reads, seed_name,
                                         start, end)
        if self.verbose:
            print('Making new seed for', seed_name,
                  ' ... finding most common kmer')
        new_seed = seed.Seed(extend_length=self.extend_length,
                             overlap_length=self.overlap_length,
                             reads1=seed_reads,
                             ext_min_cov=self.ext_min_cov,
                             ext_min_ratio=self.ext_min_ratio,
                             verbose=self.verbose,
                             seed_length=self.seed_length,
                             seed_min_count=self.seed_min_count,
                             seed_max_count=self.seed_max_count,
                             kmc_threads=self.kmc_threads,
                             map_threads=self.threads)
        if len(new_seed) == 0:
            print('Warning: could not get most common kmer for', seed_name)
            return

        if self.verbose:
            print('Making new seed for', seed_name,
                  ' ... extending most common kmer')

        new_seed.extend(self.reads1, self.reads2, self.seed_stop_length)
        f = pyfastaq.utils.open_file_write(tmp_prefix + '.' + seed_name +
                                           '.fa')
        print(pyfastaq.sequences.Fasta('seed.' + seed_name,
                                       new_seed.seq[10:-10]),
              file=f)
        pyfastaq.utils.close(f)
        if self.verbose:
            print('Making new seed for', seed_name, ' ... finished')
Exemple #3
0
    def _make_new_seed(self, seed_name):
        if self.verbose:
            print('Making new seed for', seed_name, ' ... start')
        tmp_prefix = os.path.join(self.tmpdir, 'out')
        seed_reads = tmp_prefix + '.' + seed_name + '.reads_1.fa'
        if len(self.original_seeds[seed_name]) > self.seed_stop_length:
            start = int(0.5 * len(self.original_seeds[seed_name]) - 0.5 * self.seed_stop_length)
            end = int(0.5 * len(self.original_seeds[seed_name]) + 0.5 * self.seed_stop_length)
        else:
            start = None
            end = None
        if self.verbose:
            print('Making new seed for', seed_name, ' ... getting reads')
        mapping.bam_file_to_region_fasta(self.bam_file, seed_reads, seed_name, start, end)
        if self.verbose:
            print('Making new seed for', seed_name, ' ... finding most common kmer')
        new_seed = seed.Seed(
            extend_length = self.extend_length,
            overlap_length = self.overlap_length,
            reads1 = seed_reads,
            ext_min_cov = self.ext_min_cov,
            ext_min_ratio = self.ext_min_ratio,
            verbose = self.verbose,
            seed_length = self.seed_length,
            seed_min_count = self.seed_min_count,
            seed_max_count = self.seed_max_count,
            kmc_threads = self.kmc_threads,
            map_threads = self.threads
        )
        if len(new_seed) == 0:
            print('Warning: could not get most common kmer for', seed_name)
            return

        if self.verbose:
            print('Making new seed for', seed_name, ' ... extending most common kmer')

        new_seed.extend(self.reads1, self.reads2, self.seed_stop_length)
        f = pyfastaq.utils.open_file_write(tmp_prefix + '.' + seed_name + '.fa')
        print(pyfastaq.sequences.Fasta('seed.' + seed_name, new_seed.seq[10:-10]), file=f)
        pyfastaq.utils.close(f)
        if self.verbose:
            print('Making new seed for', seed_name, ' ... finished')