Esempio n. 1
0
    def align(self, inBam, refFasta, outBam, min_qual=0, nodisk=True, JVMmemory=None, **kwargs):
        with tools.samtools.SamtoolsTool().bam2fq_tmp(inBam) as (in1, in2), \
             util.file.tmp_dir('_bbmap_align') as t_dir:
            tmp_bam = os.path.join(t_dir, 'bbmap_out.bam')
            self.execute(tool='bbmap.sh', in1=in1, in2=in2, ref=refFasta, out=tmp_bam, nodisk=nodisk, **kwargs)
            
            # Samtools filter (optional)
            if min_qual:
                tmp_bam2 = os.path.join(tdir, 'bbmap.filtered.bam')
                cmd = [samtools.install_and_get_path(), 'view', '-b', '-S', '-1', '-q', str(min_qual), tmp_bam]
                _log.debug('%s > %s', ' '.join(cmd), tmp_bam2)
                with open(tmp_bam2, 'wb') as outf:
                    util.misc.run_and_save(cmd, outf=outf)
                os.unlink(tmp_bam)
                tmp_bam = tmp_bam2

            # Picard SortSam
            sorter = tools.picard.SortSamTool()
            sorter.execute(
                tmp_bam,
                outBam,
                sort_order='coordinate',
                picardOptions=['CREATE_INDEX=true', 'VALIDATION_STRINGENCY=SILENT'],
                JVMmemory=JVMmemory
            )
Esempio n. 2
0
    def align_one_rg_bam(self, inBam, refFasta, outBam, rgid=None, rgs=None, options=None, min_qual=0, JVMmemory=None):
        ''' Execute Novoalign on BAM inputs and outputs.
            Requires that only one RG exists (will error otherwise).
            Use Picard to sort and index the output BAM.
            If min_qual>0, use Samtools to filter on mapping quality.
        '''
        options = options or ["-r", "Random"]

        samtools = tools.samtools.SamtoolsTool()

        # Require exactly one RG
        rgs = rgs if rgs is not None else samtools.getReadGroups(inBam)
        if len(rgs) == 0:
            raise InvalidBamHeaderError("{} lacks read groups".format(inBam))
        elif len(rgs) == 1:
            if not rgid:
                rgid = list(rgs.keys())[0]
        elif not rgid:
            raise InvalidBamHeaderError("{} has {} read groups, but we require exactly one".format(inBam, len(rgs)))
        if rgid not in rgs:
            raise InvalidBamHeaderError("{} has read groups, but not {}".format(inBam, rgid))
        #rg = rgs[rgid]

        # Strip inBam to just one RG (if necessary)
        if len(rgs) == 1:
            one_rg_inBam = inBam
        else:
            # strip inBam to one read group
            tmp_bam = util.file.mkstempfname('.onebam.bam')
            samtools.view(['-b', '-r', rgid], inBam, tmp_bam)
            # special exit if this file is empty
            if samtools.count(tmp_bam) == 0:
                return

            # simplify BAM header otherwise Novoalign gets confused
            one_rg_inBam = util.file.mkstempfname('.{}.in.bam'.format(rgid))
            headerFile = util.file.mkstempfname('.{}.header.txt'.format(rgid))
            with open(headerFile, 'wt') as outf:
                for row in samtools.getHeader(inBam):
                    if len(row) > 0 and row[0] == '@RG':
                        if rgid != list(x[3:] for x in row if x.startswith('ID:'))[0]:
                            # skip all read groups that are not rgid
                            continue
                    outf.write('\t'.join(row) + '\n')
            samtools.reheader(tmp_bam, headerFile, one_rg_inBam)
            os.unlink(tmp_bam)
            os.unlink(headerFile)

        # Novoalign
        tmp_sam = util.file.mkstempfname('.novoalign.sam')
        tmp_sam_err = util.file.mkstempfname('.novoalign.sam.err')
        cmd = [self.install_and_get_path(), '-f', one_rg_inBam] + list(map(str, options))
        cmd = cmd + ['-F', 'BAM', '-d', self._fasta_to_idx_name(refFasta), '-o', 'SAM']
        _log.debug(' '.join(cmd))
        with open(tmp_sam, 'wt') as outf:
            util.misc.run_and_save(cmd, outf=outf)

        # Samtools filter (optional)
        if min_qual:
            tmp_bam2 = util.file.mkstempfname('.filtered.bam')
            cmd = [samtools.install_and_get_path(), 'view', '-b', '-S', '-1', '-q', str(min_qual), tmp_sam]
            _log.debug('%s > %s', ' '.join(cmd), tmp_bam2)
            with open(tmp_bam2, 'wb') as outf:
                util.misc.run_and_save(cmd, outf=outf)
            os.unlink(tmp_sam)
            tmp_sam = tmp_bam2

        # Picard SortSam
        sorter = tools.picard.SortSamTool()
        sorter.execute(
            tmp_sam,
            outBam,
            sort_order='coordinate',
            picardOptions=['CREATE_INDEX=true', 'VALIDATION_STRINGENCY=SILENT'],
            JVMmemory=JVMmemory
        )