예제 #1
0
 def test_bam_file_to_fasta_pair_files_region(self):
     '''Test bam_file_to_fasta_pair_files with a region'''
     tmp1 = 'tmp.to_fasta_1.fa'
     tmp2 = 'tmp.to_fasta_2.fa'
     mapping.bam_file_to_fasta_pair_files(os.path.join(data_dir, 'mapping_test.smalt.out.sorted.bam'), tmp1, tmp2, chromosome='ref', start=25, end=150)
     self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'mapping_test.bam_to_region_1.fa'), tmp1))
     self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'mapping_test.bam_to_region_2.fa'), tmp2))
     os.unlink(tmp1)
     os.unlink(tmp2)
예제 #2
0
 def test_bam_file_to_fasta_pair_files(self):
     '''Test bam_file_to_fasta_pair_files'''
     tmp1 = 'tmp.to_fasta_1.fa'
     tmp2 = 'tmp.to_fasta_2.fa'
     mapping.bam_file_to_fasta_pair_files(os.path.join(data_dir, 'mapping_test.smalt.out.bam'), tmp1, tmp2)
     self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'mapping_test.reads_1.fasta'), tmp1))
     self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'mapping_test.reads_2.fasta'), tmp2))
     os.unlink(tmp1)
     os.unlink(tmp2)
예제 #3
0
    def _trim_strand_biased_ends(self,
                                 reads_prefix,
                                 out_prefix=None,
                                 tag_as_trimmed=False,
                                 break_contigs=False):
        tmpdir = tempfile.mkdtemp(prefix='tmp.trim_strand_biased_ends.',
                                  dir=os.getcwd())
        tmp_prefix = os.path.join(tmpdir, 'out')
        sorted_bam = tmp_prefix + '.bam'
        unsorted_bam = tmp_prefix + '.unsorted.bam'
        original_map_minid = self.map_minid
        self.map_minid = 0.9
        self._map_reads(reads_prefix + '_1.fa',
                        reads_prefix + '_2.fa',
                        tmp_prefix,
                        sort_reads=True)
        assert os.path.exists(sorted_bam)
        self.map_minid = original_map_minid
        new_contigs = []
        contigs_to_remove = set()
        for ctg in self.contigs:
            if break_contigs:
                subcontigs = self._subcontigs_from_strand_bias(sorted_bam, ctg)
                if len(subcontigs):
                    new_contigs.extend(subcontigs)
                    contigs_to_remove.add(ctg)
            elif ctg not in self.contigs_trimmed_for_strand_bias:
                self._trim_contig_for_strand_bias(sorted_bam, ctg)
                # contig could get completely trimmed so nothing left, in which
                # case, we need to remove it
                if len(self.contigs[ctg]) == 0:
                    contigs_to_remove.add(ctg)
                elif tag_as_trimmed:
                    self.contigs_trimmed_for_strand_bias.add(ctg)

        for ctg in contigs_to_remove:
            self._remove_contig(ctg)

        for ctg in new_contigs:
            self._add_contig(ctg, min_length=0.75 * self.self.seed_stop_length)

        if out_prefix is not None:
            mapping.bam_file_to_fasta_pair_files(unsorted_bam,
                                                 out_prefix + '_1.fa',
                                                 out_prefix + '_2.fa',
                                                 remove_proper_pairs=True)
        shutil.rmtree(tmpdir)
예제 #4
0
파일: assembly.py 프로젝트: andrewjpage/iva
    def _trim_strand_biased_ends(self, reads_prefix, out_prefix=None, tag_as_trimmed=False, break_contigs=False):
        tmpdir = tempfile.mkdtemp(prefix='tmp.trim_strand_biased_ends.', dir=os.getcwd())
        tmp_prefix = os.path.join(tmpdir, 'out')
        sorted_bam = tmp_prefix + '.bam'
        unsorted_bam = tmp_prefix + '.unsorted.bam'
        original_map_minid = self.map_minid
        self.map_minid = 0.9
        self._map_reads(reads_prefix + '_1.fa', reads_prefix + '_2.fa', tmp_prefix, sort_reads=True)
        assert os.path.exists(sorted_bam)
        self.map_minid = original_map_minid
        new_contigs = []
        contigs_to_remove = set()
        for ctg in self.contigs:
            if break_contigs:
                subcontigs = self._subcontigs_from_strand_bias(sorted_bam, ctg)
                if len(subcontigs):
                    new_contigs.extend(subcontigs)
                    contigs_to_remove.add(ctg)
            elif ctg not in self.contigs_trimmed_for_strand_bias:
                self._trim_contig_for_strand_bias(sorted_bam, ctg)
                # contig could get completely trimmed so nothing left, in which
                # case, we need to remove it
                if len(self.contigs[ctg]) == 0:
                    contigs_to_remove.add(ctg)
                elif tag_as_trimmed:
                    self.contigs_trimmed_for_strand_bias.add(ctg)

        for ctg in contigs_to_remove:
            self._remove_contig(ctg)

        for ctg in new_contigs:
            self._add_contig(ctg, min_length=0.75 * self.self.seed_stop_length)


        if out_prefix is not None:
            mapping.bam_file_to_fasta_pair_files(unsorted_bam, out_prefix + '_1.fa', out_prefix + '_2.fa', remove_proper_pairs=True)
        shutil.rmtree(tmpdir)
예제 #5
0
 def _get_unmapped_pairs(self, reads1, reads2, out_prefix):
     self._map_reads(reads1, reads2, out_prefix, required_flag=12)
     mapping.bam_file_to_fasta_pair_files(out_prefix + '.bam',
                                          out_prefix + '_1.fa',
                                          out_prefix + '_2.fa')
     os.unlink(out_prefix + '.bam')
예제 #6
0
    def _read_pair_extension_iterations(self,
                                        reads_prefix,
                                        out_prefix,
                                        no_map_contigs=None):
        if no_map_contigs is None:
            no_map_contigs = set()
        assert (len(self.contigs) > len(no_map_contigs))
        if self.verbose:
            print('{:-^79}'.format(' ' + out_prefix +
                                   ' start extension subiteration 0001 '),
                  flush=True)

        bases_added = self._extend_with_reads(reads_prefix, out_prefix + '.1',
                                              no_map_contigs)
        current_reads_prefix = reads_prefix

        if bases_added == 0:
            return True
        try_contig_trim = False
        i = 1

        while self._worth_extending() or try_contig_trim:
            i += 1
            if self.verbose:
                print('{:-^79}'.format(' ' + out_prefix +
                                       ' start extension subiteration ' +
                                       str(i).zfill(4) + ' '),
                      flush=True)

            if i % 5 == 0:
                tmpdir = tempfile.mkdtemp(prefix='tmp.filter_reads.',
                                          dir=os.getcwd())
                tmp_prefix = os.path.join(tmpdir, 'out')
                bam = tmp_prefix + '.bam'
                original_map_minid = self.map_minid
                self.map_minid = 0.9
                self._map_reads(current_reads_prefix + '_1.fa',
                                current_reads_prefix + '_2.fa', tmp_prefix)
                self.map_minid = original_map_minid
                filter_prefix = reads_prefix + '.subiter.' + str(i) + '.reads'
                mapping.bam_file_to_fasta_pair_files(bam,
                                                     filter_prefix + '_1.fa',
                                                     filter_prefix + '_2.fa',
                                                     remove_proper_pairs=True)
                if current_reads_prefix != reads_prefix:
                    os.unlink(current_reads_prefix + '_1.fa')
                    os.unlink(current_reads_prefix + '_2.fa')
                current_reads_prefix = filter_prefix
                shutil.rmtree(tmpdir)

            iter_prefix = out_prefix + '.' + str(i)
            bases_added = self._extend_with_reads(current_reads_prefix,
                                                  iter_prefix, no_map_contigs)

            if bases_added == 0:
                if not try_contig_trim:
                    if self.verbose:
                        print('    No bases added. Try trimming contigs')
                    self._trim_strand_biased_ends(reads_prefix,
                                                  tag_as_trimmed=False)
                    if len(self.contigs) <= len(no_map_contigs):
                        if self.verbose:
                            print(
                                '       lost contigs during trimming. No more iterations'
                            )
                        return False
                    self.trim_contigs(self.contig_iter_trim)
                    try_contig_trim = True
                else:
                    if self.verbose:
                        print(
                            '    No bases added after trimming. No more iterations'
                        )
                    break
            else:
                try_contig_trim = False

        if current_reads_prefix != reads_prefix:
            os.unlink(current_reads_prefix + '_1.fa')
            os.unlink(current_reads_prefix + '_2.fa')
        return True
예제 #7
0
파일: assembly.py 프로젝트: andrewjpage/iva
 def _get_unmapped_pairs(self, reads1, reads2, out_prefix):
     self._map_reads(reads1, reads2, out_prefix, required_flag=12)
     mapping.bam_file_to_fasta_pair_files(out_prefix + '.bam', out_prefix + '_1.fa', out_prefix + '_2.fa')
     os.unlink(out_prefix + '.bam')
예제 #8
0
파일: assembly.py 프로젝트: andrewjpage/iva
    def _read_pair_extension_iterations(self, reads_prefix, out_prefix, no_map_contigs=None):
        if no_map_contigs is None:
            no_map_contigs = set()
        assert(len(self.contigs) > len(no_map_contigs))
        if self.verbose:
            print('{:-^79}'.format(' ' + out_prefix + ' start extension subiteration 0001 '), flush=True)

        bases_added = self._extend_with_reads(reads_prefix, out_prefix + '.1', no_map_contigs)
        current_reads_prefix = reads_prefix

        if bases_added == 0:
            return True
        try_contig_trim = False
        i = 1

        while self._worth_extending() or try_contig_trim:
            i += 1
            if self.verbose:
                print('{:-^79}'.format(' ' + out_prefix + ' start extension subiteration ' + str(i).zfill(4) + ' '), flush=True)

            if i % 5 == 0:
                tmpdir = tempfile.mkdtemp(prefix='tmp.filter_reads.', dir=os.getcwd())
                tmp_prefix = os.path.join(tmpdir, 'out')
                bam = tmp_prefix + '.bam'
                original_map_minid = self.map_minid
                self.map_minid = 0.9
                self._map_reads(current_reads_prefix + '_1.fa', current_reads_prefix + '_2.fa', tmp_prefix)
                self.map_minid = original_map_minid
                filter_prefix = reads_prefix + '.subiter.' + str(i) + '.reads'
                mapping.bam_file_to_fasta_pair_files(bam, filter_prefix + '_1.fa', filter_prefix + '_2.fa', remove_proper_pairs=True)
                if current_reads_prefix != reads_prefix:
                    os.unlink(current_reads_prefix + '_1.fa')
                    os.unlink(current_reads_prefix + '_2.fa')
                current_reads_prefix = filter_prefix
                shutil.rmtree(tmpdir)

            iter_prefix = out_prefix + '.' + str(i)
            bases_added = self._extend_with_reads(current_reads_prefix, iter_prefix, no_map_contigs)

            if bases_added == 0:
                if not try_contig_trim:
                    if self.verbose:
                        print('    No bases added. Try trimming contigs')
                    self._trim_strand_biased_ends(reads_prefix, tag_as_trimmed=False)
                    if len(self.contigs) <= len(no_map_contigs):
                        if self.verbose:
                            print('       lost contigs during trimming. No more iterations')
                        return False
                    self.trim_contigs(self.contig_iter_trim)
                    try_contig_trim = True
                else:
                    if self.verbose:
                        print('    No bases added after trimming. No more iterations')
                    break
            else:
                try_contig_trim = False

        if current_reads_prefix != reads_prefix:
            os.unlink(current_reads_prefix + '_1.fa')
            os.unlink(current_reads_prefix + '_2.fa')
        return True