def __init__(self, reads, outdir, threads=1, spades_kmers=None, careful=True, only_assembler=True, verbose=False, spades_use_first_success=False, assembler='spades', genomeSize=100000, # only matters for Canu if correcting reads (which we're not) data_type='pacbio-corrected', ): self.outdir = os.path.abspath(outdir) self.reads = os.path.abspath(reads) if not os.path.exists(self.reads): raise Error('Reads file not found:' + self.reads) self.verbose = verbose self.samtools = external_progs.make_and_check_prog('samtools', verbose=self.verbose) self.threads = threads self.assembler = assembler if self.assembler == 'spades': self.spades = external_progs.make_and_check_prog('spades', verbose=self.verbose, required=True) self.spades_kmers = self._build_spades_kmers(spades_kmers) self.spades_use_first_success = spades_use_first_success self.careful = careful self.only_assembler = only_assembler elif self.assembler == 'canu': self.canu = external_progs.make_and_check_prog('canu', verbose=self.verbose, required=True) self.genomeSize=genomeSize self.data_type = data_type else: raise Error('Unknown assembler: "' + self.assembler + '". cannot continue')
def bwa_mem( ref, reads, outfile, threads=1, bwa_options = '-x pacbio', verbose=False, index=None ): samtools = external_progs.make_and_check_prog('samtools', verbose=verbose) bwa = external_progs.make_and_check_prog('bwa', verbose=verbose) unsorted_bam = outfile + '.tmp.unsorted.bam' tmp_index = outfile + '.tmp.bwa_index' bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa.exe()) cmd = ' '.join([ bwa.exe(), 'mem', bwa_options, '-t', str(threads), tmp_index, reads, '|', samtools.exe(), 'view', '-F 0x0800', '-T', ref, '-b', '-o', unsorted_bam, '-', ]) common.syscall(cmd, verbose=verbose) bwa_index_clean(tmp_index) threads = min(4, threads) thread_mem = int(500 / threads) cmd = ' '.join([ samtools.exe(), 'sort', '-@', str(threads), '-m', str(thread_mem) + 'M', unsorted_bam, outfile[:-4] ]) common.syscall(cmd, verbose=verbose) os.unlink(unsorted_bam) cmd = samtools.exe() + ' index ' + outfile common.syscall(cmd, verbose=verbose)
def __init__( self, reads, outdir, threads=1, spades_kmers=None, careful=True, only_assembler=True, verbose=False, spades_use_first_success=False, assembler='spades', genomeSize=100000, # only matters for Canu if correcting reads (which we're not) data_type='pacbio-raw', ): self.outdir = os.path.abspath(outdir) self.reads = os.path.abspath(reads) if not os.path.exists(self.reads): raise Error('Reads file not found:' + self.reads) self.verbose = verbose self.samtools = external_progs.make_and_check_prog( 'samtools', verbose=self.verbose) self.threads = threads self.assembler = assembler if self.assembler == 'spades': self.spades = external_progs.make_and_check_prog( 'spades', verbose=self.verbose, required=True) self.spades_kmers = self._build_spades_kmers(spades_kmers) self.spades_use_first_success = spades_use_first_success self.careful = careful self.only_assembler = only_assembler elif self.assembler == 'canu': self.canu = external_progs.make_and_check_prog( 'canu', verbose=self.verbose, required=True) self.genomeSize = genomeSize self.data_type = data_type elif self.assembler == 'racon': self.racon = external_progs.make_and_check_prog( 'racon', verbose=self.verbose, required=True) self.data_type = data_type else: raise Error('Unknown assembler: "' + self.assembler + '". cannot continue')
def __init__( self, reads, outdir, threads=1, CanuError=0.045, spades_kmers=None, careful=True, only_assembler=True, verbose=False, spades_use_first_success=False, useCanu=False, genomeSize=100000, dataType='pacbio-raw', ): self.outdir = os.path.abspath(outdir) self.reads = os.path.abspath(reads) if not os.path.exists(self.reads): raise Error('Reads file not found:' + self.reads) self.verbose = verbose self.samtools = external_progs.make_and_check_prog( 'samtools', verbose=self.verbose) if (not useCanu): self.useCanu = False self.assembler = 'spades' self.spades = external_progs.make_and_check_prog( 'spades', verbose=self.verbose) self.spades_kmers = self._build_spades_kmers(spades_kmers) self.spades_use_first_success = spades_use_first_success self.careful = careful self.only_assembler = only_assembler self.threads = threads else: self.CanuError = CanuError self.useCanu = True self.assembler = 'canu' self.canu = external_progs.make_and_check_prog( 'canu', verbose=self.verbose) self.genomeSize = genomeSize #self.genomeSize=self.length_cutoff self.dataType = dataType
def __init__(self, reads, outdir, threads=1, spades_kmers=None, verbose=False, spades_use_first_success=False, ): self.outdir = os.path.abspath(outdir) self.reads = os.path.abspath(reads) if not os.path.exists(self.reads): raise Error('Reads file not found:' + self.reads) self.verbose = verbose self.threads = threads self.spades = external_progs.make_and_check_prog('spades', verbose=self.verbose) self.spades_kmers = self._build_spades_kmers(spades_kmers) self.spades_use_first_success = spades_use_first_success self.samtools = external_progs.make_and_check_prog('samtools', verbose=self.verbose) self.assembler = 'spades'
def __init__( self, reads, outdir, threads=1, spades_kmers=None, verbose=False, spades_use_first_success=False, ): self.outdir = os.path.abspath(outdir) self.reads = os.path.abspath(reads) if not os.path.exists(self.reads): raise Error('Reads file not found:' + self.reads) self.verbose = verbose self.threads = threads self.spades = external_progs.make_and_check_prog('spades', verbose=self.verbose) self.spades_kmers = self._build_spades_kmers(spades_kmers) self.spades_use_first_success = spades_use_first_success self.samtools = external_progs.make_and_check_prog( 'samtools', verbose=self.verbose) self.assembler = 'spades'
def bwa_index(infile, outprefix=None, bwa=None, verbose=False): if bwa is None: bwa = external_progs.make_and_check_prog('bwa', verbose=verbose) if outprefix is None: outprefix = infile missing = [ not os.path.exists(outprefix + '.' + x) for x in index_extensions ] if True not in missing: return cmd = ' '.join([bwa.exe(), 'index', '-p', outprefix, infile]) common.syscall(cmd, verbose=verbose)
def bwa_index(infile, outprefix=None, bwa=None, verbose=False): if bwa is None: bwa = external_progs.make_and_check_prog('bwa', verbose=verbose) if outprefix is None: outprefix = infile missing = [not os.path.exists(outprefix + '.' + x) for x in index_extensions] if True not in missing: return cmd = ' '.join([ bwa.exe(), 'index', '-p', outprefix, infile ]) common.syscall(cmd, verbose=verbose)
def bwa_mem( ref, reads, outfile, threads=1, bwa_options = '-x pacbio', verbose=False, index=None ): samtools = external_progs.make_and_check_prog('samtools', verbose=verbose) bwa = external_progs.make_and_check_prog('bwa', verbose=verbose) unsorted_bam = outfile + '.tmp.unsorted.bam' tmp_index = outfile + '.tmp.bwa_index' bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa) cmd = ' '.join([ bwa.exe(), 'mem', bwa_options, '-t', str(threads), tmp_index, reads, '|', samtools.exe(), 'view', '-F 0x0800', '-T', ref, '-b', '-o', unsorted_bam, '-', ]) common.syscall(cmd, verbose=verbose) bwa_index_clean(tmp_index) threads = min(4, threads) thread_mem = int(500 / threads) # here we have to check for the version of samtools, starting from 1.3 the # -o flag is used for specifying the samtools sort output-file. # Starting from 1.2 you can use the -o flag, but can't have # -o out.bam at the end of the call, so use new style from 1.3 onwards. outparam = '' if samtools.version_at_least('1.3'): outparam = '-o' samout = outfile else: samout = outfile[:-4] cmd = ' '.join([ samtools.exe(), 'sort', '-@', str(threads), '-m', str(thread_mem) + 'M', unsorted_bam, outparam,samout ]) common.syscall(cmd, verbose=verbose) os.unlink(unsorted_bam) cmd = samtools.exe() + ' index ' + outfile common.syscall(cmd, verbose=verbose)
def bwa_mem(ref, reads, outfile, threads=1, bwa_options='-x pacbio', verbose=False, index=None): samtools = external_progs.make_and_check_prog('samtools', verbose=verbose) bwa = external_progs.make_and_check_prog('bwa', verbose=verbose) unsorted_bam = outfile + '.tmp.unsorted.bam' tmp_index = outfile + '.tmp.bwa_index' bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa) cmd = ' '.join([ bwa.exe(), 'mem', bwa_options, '-t', str(threads), tmp_index, reads, '|', samtools.exe(), 'view', '-F 0x0800', '-T', ref, '-b', '-o', unsorted_bam, '-', ]) common.syscall(cmd, verbose=verbose) bwa_index_clean(tmp_index) threads = min(4, threads) thread_mem = int(500 / threads) # here we have to check for the version of samtools, starting from 1.3 the # -o flag is used for specifying the samtools sort output-file. # Starting from 1.2 you can use the -o flag, but can't have # -o out.bam at the end of the call, so use new style from 1.3 onwards. outparam = '' if samtools.version_at_least('1.3'): outparam = '-o' samout = outfile else: samout = outfile[:-4] cmd = ' '.join([ samtools.exe(), 'sort', '-@', str(threads), '-m', str(thread_mem) + 'M', unsorted_bam, outparam, samout ]) common.syscall(cmd, verbose=verbose) os.unlink(unsorted_bam) cmd = samtools.exe() + ' index ' + outfile common.syscall(cmd, verbose=verbose)