Example #1
0
    def __init__(self,
      reads,
      outdir,
      threads=1,
      spades_kmers=None,
      careful=True,
      only_assembler=True,
      verbose=False,
      spades_use_first_success=False,
      assembler='spades',
      genomeSize=100000, # only matters for Canu if correcting reads (which we're not)
      data_type='pacbio-corrected',
    ):
        self.outdir = os.path.abspath(outdir)
        self.reads = os.path.abspath(reads)
        if not os.path.exists(self.reads):
            raise Error('Reads file not found:' + self.reads)

        self.verbose = verbose
        self.samtools = external_progs.make_and_check_prog('samtools', verbose=self.verbose)
        self.threads = threads
        self.assembler = assembler

        if self.assembler == 'spades':
            self.spades = external_progs.make_and_check_prog('spades', verbose=self.verbose, required=True)
            self.spades_kmers = self._build_spades_kmers(spades_kmers)
            self.spades_use_first_success = spades_use_first_success
            self.careful = careful
            self.only_assembler = only_assembler
        elif self.assembler == 'canu':
            self.canu = external_progs.make_and_check_prog('canu', verbose=self.verbose, required=True)
            self.genomeSize=genomeSize
            self.data_type = data_type
        else:
            raise Error('Unknown assembler: "' + self.assembler + '". cannot continue')
Example #2
0
def bwa_mem(
      ref,
      reads,
      outfile,
      threads=1,
      bwa_options = '-x pacbio',
      verbose=False,
      index=None
    ):

    samtools = external_progs.make_and_check_prog('samtools', verbose=verbose)
    bwa = external_progs.make_and_check_prog('bwa', verbose=verbose)
    unsorted_bam = outfile + '.tmp.unsorted.bam'
    tmp_index = outfile + '.tmp.bwa_index'
    bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa.exe())

    cmd = ' '.join([
        bwa.exe(), 'mem',
        bwa_options,
        '-t', str(threads),
        tmp_index,
        reads, 
        '|',
        samtools.exe(), 'view',
        '-F 0x0800',
        '-T', ref,
        '-b',
        '-o', unsorted_bam,
        '-',
    ])

    common.syscall(cmd, verbose=verbose)
    bwa_index_clean(tmp_index)
    threads = min(4, threads)
    thread_mem = int(500 / threads)
    
    cmd = ' '.join([
        samtools.exe(), 'sort',
        '-@', str(threads),
        '-m', str(thread_mem) + 'M',
        unsorted_bam,
        outfile[:-4]
    ])
 
    common.syscall(cmd, verbose=verbose)
    os.unlink(unsorted_bam)

    cmd = samtools.exe() + ' index ' + outfile
    common.syscall(cmd, verbose=verbose)
Example #3
0
    def __init__(
        self,
        reads,
        outdir,
        threads=1,
        spades_kmers=None,
        careful=True,
        only_assembler=True,
        verbose=False,
        spades_use_first_success=False,
        assembler='spades',
        genomeSize=100000,  # only matters for Canu if correcting reads (which we're not)
        data_type='pacbio-raw',
    ):
        self.outdir = os.path.abspath(outdir)
        self.reads = os.path.abspath(reads)
        if not os.path.exists(self.reads):
            raise Error('Reads file not found:' + self.reads)

        self.verbose = verbose
        self.samtools = external_progs.make_and_check_prog(
            'samtools', verbose=self.verbose)
        self.threads = threads
        self.assembler = assembler

        if self.assembler == 'spades':
            self.spades = external_progs.make_and_check_prog(
                'spades', verbose=self.verbose, required=True)
            self.spades_kmers = self._build_spades_kmers(spades_kmers)
            self.spades_use_first_success = spades_use_first_success
            self.careful = careful
            self.only_assembler = only_assembler
        elif self.assembler == 'canu':
            self.canu = external_progs.make_and_check_prog(
                'canu', verbose=self.verbose, required=True)
            self.genomeSize = genomeSize
            self.data_type = data_type
        elif self.assembler == 'racon':
            self.racon = external_progs.make_and_check_prog(
                'racon', verbose=self.verbose, required=True)
            self.data_type = data_type
        else:
            raise Error('Unknown assembler: "' + self.assembler +
                        '". cannot continue')
Example #4
0
    def __init__(
        self,
        reads,
        outdir,
        threads=1,
        CanuError=0.045,
        spades_kmers=None,
        careful=True,
        only_assembler=True,
        verbose=False,
        spades_use_first_success=False,
        useCanu=False,
        genomeSize=100000,
        dataType='pacbio-raw',
    ):
        self.outdir = os.path.abspath(outdir)
        self.reads = os.path.abspath(reads)
        if not os.path.exists(self.reads):
            raise Error('Reads file not found:' + self.reads)

        self.verbose = verbose
        self.samtools = external_progs.make_and_check_prog(
            'samtools', verbose=self.verbose)
        if (not useCanu):
            self.useCanu = False
            self.assembler = 'spades'
            self.spades = external_progs.make_and_check_prog(
                'spades', verbose=self.verbose)
            self.spades_kmers = self._build_spades_kmers(spades_kmers)
            self.spades_use_first_success = spades_use_first_success
            self.careful = careful
            self.only_assembler = only_assembler
            self.threads = threads
        else:
            self.CanuError = CanuError
            self.useCanu = True
            self.assembler = 'canu'
            self.canu = external_progs.make_and_check_prog(
                'canu', verbose=self.verbose)
            self.genomeSize = genomeSize
            #self.genomeSize=self.length_cutoff
            self.dataType = dataType
Example #5
0
    def __init__(self,
      reads,
      outdir,
      threads=1,
      spades_kmers=None,
      verbose=False,
      spades_use_first_success=False,
    ):
        self.outdir = os.path.abspath(outdir)
        self.reads = os.path.abspath(reads)
        if not os.path.exists(self.reads):
            raise Error('Reads file not found:' + self.reads)

        self.verbose = verbose
        self.threads = threads
        self.spades = external_progs.make_and_check_prog('spades', verbose=self.verbose)
        self.spades_kmers = self._build_spades_kmers(spades_kmers)
        self.spades_use_first_success = spades_use_first_success
        self.samtools = external_progs.make_and_check_prog('samtools', verbose=self.verbose)
        self.assembler = 'spades'
Example #6
0
    def __init__(
        self,
        reads,
        outdir,
        threads=1,
        spades_kmers=None,
        verbose=False,
        spades_use_first_success=False,
    ):
        self.outdir = os.path.abspath(outdir)
        self.reads = os.path.abspath(reads)
        if not os.path.exists(self.reads):
            raise Error('Reads file not found:' + self.reads)

        self.verbose = verbose
        self.threads = threads
        self.spades = external_progs.make_and_check_prog('spades',
                                                         verbose=self.verbose)
        self.spades_kmers = self._build_spades_kmers(spades_kmers)
        self.spades_use_first_success = spades_use_first_success
        self.samtools = external_progs.make_and_check_prog(
            'samtools', verbose=self.verbose)
        self.assembler = 'spades'
Example #7
0
def bwa_index(infile, outprefix=None, bwa=None, verbose=False):
    if bwa is None:
        bwa = external_progs.make_and_check_prog('bwa', verbose=verbose)

    if outprefix is None:
        outprefix = infile

    missing = [
        not os.path.exists(outprefix + '.' + x) for x in index_extensions
    ]
    if True not in missing:
        return

    cmd = ' '.join([bwa.exe(), 'index', '-p', outprefix, infile])
    common.syscall(cmd, verbose=verbose)
Example #8
0
def bwa_index(infile, outprefix=None, bwa=None, verbose=False):
    if bwa is None:
        bwa = external_progs.make_and_check_prog('bwa', verbose=verbose)

    if outprefix is None:
        outprefix = infile

    missing = [not os.path.exists(outprefix + '.' + x) for x in index_extensions]
    if True not in missing:
        return

    cmd = ' '.join([
        bwa.exe(),  'index',
        '-p', outprefix,
        infile
    ])
    common.syscall(cmd, verbose=verbose)
Example #9
0
def bwa_mem(
      ref,
      reads,
      outfile,
      threads=1,
      bwa_options = '-x pacbio',
      verbose=False,
      index=None
    ):

    samtools = external_progs.make_and_check_prog('samtools', verbose=verbose)
    bwa = external_progs.make_and_check_prog('bwa', verbose=verbose)
    unsorted_bam = outfile + '.tmp.unsorted.bam'
    tmp_index = outfile + '.tmp.bwa_index'
    bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa)

    cmd = ' '.join([
        bwa.exe(), 'mem',
        bwa_options,
        '-t', str(threads),
        tmp_index,
        reads,
        '|',
        samtools.exe(), 'view',
        '-F 0x0800',
        '-T', ref,
        '-b',
        '-o', unsorted_bam,
        '-',
    ])

    common.syscall(cmd, verbose=verbose)
    bwa_index_clean(tmp_index)
    threads = min(4, threads)
    thread_mem = int(500 / threads)

    # here we have to check for the version of samtools, starting from 1.3 the
    # -o flag is used for specifying the samtools sort output-file.
    # Starting from 1.2 you can use the -o flag, but can't have
    # -o out.bam at the end of the call, so use new style from 1.3 onwards.

    outparam = ''

    if samtools.version_at_least('1.3'):
        outparam = '-o'
        samout = outfile
    else:
        samout = outfile[:-4]

    cmd = ' '.join([
        samtools.exe(), 'sort',
        '-@', str(threads),
        '-m', str(thread_mem) + 'M',
        unsorted_bam,
        outparam,samout
    ])

    common.syscall(cmd, verbose=verbose)
    os.unlink(unsorted_bam)

    cmd = samtools.exe() + ' index ' + outfile
    common.syscall(cmd, verbose=verbose)
Example #10
0
def bwa_mem(ref,
            reads,
            outfile,
            threads=1,
            bwa_options='-x pacbio',
            verbose=False,
            index=None):

    samtools = external_progs.make_and_check_prog('samtools', verbose=verbose)
    bwa = external_progs.make_and_check_prog('bwa', verbose=verbose)
    unsorted_bam = outfile + '.tmp.unsorted.bam'
    tmp_index = outfile + '.tmp.bwa_index'
    bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa)

    cmd = ' '.join([
        bwa.exe(),
        'mem',
        bwa_options,
        '-t',
        str(threads),
        tmp_index,
        reads,
        '|',
        samtools.exe(),
        'view',
        '-F 0x0800',
        '-T',
        ref,
        '-b',
        '-o',
        unsorted_bam,
        '-',
    ])

    common.syscall(cmd, verbose=verbose)
    bwa_index_clean(tmp_index)
    threads = min(4, threads)
    thread_mem = int(500 / threads)

    # here we have to check for the version of samtools, starting from 1.3 the
    # -o flag is used for specifying the samtools sort output-file.
    # Starting from 1.2 you can use the -o flag, but can't have
    # -o out.bam at the end of the call, so use new style from 1.3 onwards.

    outparam = ''

    if samtools.version_at_least('1.3'):
        outparam = '-o'
        samout = outfile
    else:
        samout = outfile[:-4]

    cmd = ' '.join([
        samtools.exe(), 'sort', '-@',
        str(threads), '-m',
        str(thread_mem) + 'M', unsorted_bam, outparam, samout
    ])

    common.syscall(cmd, verbose=verbose)
    os.unlink(unsorted_bam)

    cmd = samtools.exe() + ' index ' + outfile
    common.syscall(cmd, verbose=verbose)