Esempio n. 1
0
 def _run_minimus2(self, infile, outdir):
     amos_afg_prefix = os.path.join(outdir, 'minimus2')
     amos_afg = amos_afg_prefix + '.afg'
     cmd = 'toAmos -s ' + infile + ' -o ' + amos_afg
     common.syscall(cmd)
     cmd = 'minimus2 ' + amos_afg_prefix
     return common.syscall(cmd, allow_fail=True)
Esempio n. 2
0
 def _run_minimus2(self, infile, outdir):
     amos_afg_prefix = os.path.join(outdir, 'minimus2')
     amos_afg = amos_afg_prefix + '.afg'
     cmd = 'toAmos -s ' + infile + ' -o ' + amos_afg
     common.syscall(cmd)
     cmd = 'minimus2 ' + amos_afg_prefix
     return common.syscall(cmd, allow_fail=True)
Esempio n. 3
0
    def run_spades(self, stop_at_first_success=False):
        '''Runs spades on all kmers. Each a separate run because SPAdes dies if any kmer does
           not work. Chooses the 'best' assembly to be the one with the biggest N50'''
        n50 = {}
        kmer_to_dir = {}

        for k in self.spades_kmers:
            tmpdir = tempfile.mkdtemp(prefix=self.outdir + '.tmp.spades.' +
                                      str(k) + '.',
                                      dir=os.getcwd())
            kmer_to_dir[k] = tmpdir
            ok, errs = self.run_spades_once(k, tmpdir)
            if ok:
                contigs_fasta = os.path.join(tmpdir, 'contigs.fasta')
                contigs_fai = contigs_fasta + '.fai'
                common.syscall(self.samtools.exe() + ' faidx ' + contigs_fasta,
                               verbose=self.verbose)
                stats = pyfastaq.tasks.stats_from_fai(contigs_fai)
                if stats['N50'] != 0:
                    n50[k] = stats['N50']

                    if stop_at_first_success:
                        break

        if len(n50) > 0:
            if self.verbose:
                print('[assemble]\tkmer\tN50')
                for k in sorted(n50):
                    print('[assemble]', k, n50[k], sep='\t')

            best_k = None

            for k in sorted(n50):
                if best_k is None or n50[k] >= n50[best_k]:
                    best_k = k

            assert best_k is not None

            for k, directory in kmer_to_dir.items():
                if k == best_k:
                    if self.verbose:
                        print('[assemble] using assembly with kmer', k)
                    os.rename(directory, self.outdir)
                else:
                    shutil.rmtree(directory)
        else:
            raise Error(
                'Error running SPAdes. Output directories are:\n  ' +
                '\n  '.join(kmer_to_dir.values()) +
                '\nThe reason why should be in the spades.log file in each directory.'
            )
Esempio n. 4
0
def bwa_index(infile, outprefix=None, bwa='bwa', verbose=False):
    if outprefix is None: 
        outprefix = infile

    missing = [not os.path.exists(outprefix + '.' + x) for x in index_extensions]
    if True not in missing:
        return

    cmd = ' '.join([
        bwa,  'index',
        '-p', outprefix,
        infile
    ])
    common.syscall(cmd, verbose=verbose)
Esempio n. 5
0
def bwa_index(infile, outprefix=None, bwa=None, verbose=False):
    if bwa is None:
        bwa = external_progs.make_and_check_prog('bwa', verbose=verbose)

    if outprefix is None:
        outprefix = infile

    missing = [
        not os.path.exists(outprefix + '.' + x) for x in index_extensions
    ]
    if True not in missing:
        return

    cmd = ' '.join([bwa.exe(), 'index', '-p', outprefix, infile])
    common.syscall(cmd, verbose=verbose)
Esempio n. 6
0
def bwa_index(infile, outprefix=None, bwa=None, verbose=False):
    if bwa is None:
        bwa = external_progs.make_and_check_prog('bwa', verbose=verbose)

    if outprefix is None:
        outprefix = infile

    missing = [not os.path.exists(outprefix + '.' + x) for x in index_extensions]
    if True not in missing:
        return

    cmd = ' '.join([
        bwa.exe(),  'index',
        '-p', outprefix,
        infile
    ])
    common.syscall(cmd, verbose=verbose)
Esempio n. 7
0
    def run_spades(self, stop_at_first_success=False):
        '''Runs spades on all kmers. Each a separate run because SPAdes dies if any kmer does
           not work. Chooses the 'best' assembly to be the one with the biggest N50'''
        n50 = {}
        kmer_to_dir = {}

        for k in self.spades_kmers:
            tmpdir = tempfile.mkdtemp(prefix=self.outdir + '.tmp.spades.' + str(k) + '.', dir=os.getcwd())
            kmer_to_dir[k] = tmpdir
            ok, errs = self.run_spades_once(k, tmpdir)
            if ok:
                contigs_fasta = os.path.join(tmpdir, 'contigs.fasta')
                contigs_fai = contigs_fasta + '.fai'
                common.syscall(self.samtools.exe() + ' faidx ' + contigs_fasta, verbose=self.verbose)
                stats = pyfastaq.tasks.stats_from_fai(contigs_fai)
                if stats['N50'] != 0:
                    n50[k] = stats['N50']

                    if stop_at_first_success:
                        break

        if len(n50) > 0:
            if self.verbose:
                print('[assemble]\tkmer\tN50')
                for k in sorted(n50):
                    print('[assemble]', k, n50[k], sep='\t')

            best_k = None

            for k in sorted(n50):
                if best_k is None or n50[k] >= n50[best_k]:
                    best_k = k

            assert best_k is not None

            for k, directory in kmer_to_dir.items():
                if k == best_k:
                    if self.verbose:
                        print('[assemble] using assembly with kmer', k)
                    os.rename(directory, self.outdir)
                else:
                    shutil.rmtree(directory)
        else:
            raise Error('Error running SPAdes. Output directories are:\n  ' + '\n  '.join(kmer_to_dir.values()) + '\nThe reason why should be in the spades.log file in each directory.')
Esempio n. 8
0
    def run_spades_once(self, kmer, outdir):
        cmd = ' '.join([
            self.spades.exe(),
            '-s', self.reads,
            '-k', str(kmer),
            '--careful',
            '--only-assembler',
            '-t', str(self.threads),
            '-o', outdir,
        ])

        return common.syscall(cmd, verbose=self.verbose, allow_fail=True)
Esempio n. 9
0
    def run_canu(self):
        '''Runs canu instead of spades'''
        cmd = self._make_canu_command(self.outdir, 'canu')
        ok, errs = common.syscall(cmd, verbose=self.verbose, allow_fail=False)
        if not ok:
            raise Error('Error running Canu.')

        original_contigs = os.path.join(self.outdir, 'canu.contigs.fasta')
        renamed_contigs = os.path.join(self.outdir, 'contigs.fasta')
        Assembler._rename_canu_contigs(original_contigs, renamed_contigs)
        original_gfa = os.path.join(self.outdir, 'canu.contigs.gfa')
        renamed_gfa = os.path.join(self.outdir, 'contigs.gfa')
        os.rename(original_gfa, renamed_gfa)
Esempio n. 10
0
    def run_canu(self):
        '''Runs canu instead of spades'''
        cmd = self._make_canu_command(self.outdir,'canu')
        ok, errs = common.syscall(cmd, verbose=self.verbose, allow_fail=False)
        if not ok:
            raise Error('Error running Canu.')

        original_contigs = os.path.join(self.outdir, 'canu.contigs.fasta')
        renamed_contigs = os.path.join(self.outdir, 'contigs.fasta')
        Assembler._rename_canu_contigs(original_contigs, renamed_contigs)
        original_gfa = os.path.join(self.outdir, 'canu.contigs.gfa')
        renamed_gfa = os.path.join(self.outdir, 'contigs.gfa')
        os.rename(original_gfa, renamed_gfa)
Esempio n. 11
0
    def run_canu(self):
        '''Runs canu instead of spades'''
        n50 = 0
        #tmpdir = tempfile.mkdtemp(prefix=self.outdir + '.tmp.canu.', dir=os.getcwd())
        #cmd = self._make_canu_command(tmpdir,tmpdir+'canu')
        cmd = self._make_canu_command(self.outdir, 'canu')
        ok, errs = common.syscall(cmd, verbose=self.verbose, allow_fail=False)
        if ok:
            file = open(os.path.join(self.outdir, 'canu.contigs.fasta'))
            newFile = open(os.path.join(self.outdir, 'contigs.fasta'), 'w')
            line = file.readline()
            while line != '':
                if len(line) > 0 and line[0] == '>':
                    linelist = line.split()
                    line2 = linelist[0].replace('tig00', 'NODE_') + '_length_'
                    line2 += linelist[1].split('=')[1] + '_cov_'
                    line2 += linelist[3].split('=')[1] + '_ID_'
                    line2 += linelist[0].replace('tig00', '') + '\n'
                    #line2=line.split()[0].replace('tig00','NODE_')
                    newFile.write(line2)
                else:
                    newFile.write(line)
                line = file.readline()
            file.close()
            newFile.close()
            contigs_fasta = os.path.join(self.outdir, 'contigs.fasta')
            contigs_fai = contigs_fasta + '.fai'
            common.syscall(self.samtools.exe() + ' faidx ' + contigs_fasta,
                           verbose=self.verbose)
            stats = pyfastaq.tasks.stats_from_fai(contigs_fai)
            if stats['N50'] != 0:
                n50 = stats['N50']

            #if self.verbose:
            #    print('[assemble]\tN50 '+str(n50[0]))
        else:
            raise Error('Error running Canu.')
Esempio n. 12
0
    def run_spades_once(self, kmer, outdir):
        cmd = ' '.join([
            self.spades.exe(),
            '-s',
            self.reads,
            '-k',
            str(kmer),
            '--careful',
            '--only-assembler',
            '-t',
            str(self.threads),
            '-o',
            outdir,
        ])

        return common.syscall(cmd, verbose=self.verbose, allow_fail=True)
Esempio n. 13
0
def bwa_mem(
      ref,
      reads,
      outfile,
      threads=1,
      bwa_options = '-x pacbio',
      verbose=False,
      index=None
    ):

    samtools = external_progs.make_and_check_prog('samtools', verbose=verbose)
    bwa = external_progs.make_and_check_prog('bwa', verbose=verbose)
    unsorted_bam = outfile + '.tmp.unsorted.bam'
    tmp_index = outfile + '.tmp.bwa_index'
    bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa.exe())

    cmd = ' '.join([
        bwa.exe(), 'mem',
        bwa_options,
        '-t', str(threads),
        tmp_index,
        reads, 
        '|',
        samtools.exe(), 'view',
        '-F 0x0800',
        '-T', ref,
        '-b',
        '-o', unsorted_bam,
        '-',
    ])

    common.syscall(cmd, verbose=verbose)
    bwa_index_clean(tmp_index)
    threads = min(4, threads)
    thread_mem = int(500 / threads)
    
    cmd = ' '.join([
        samtools.exe(), 'sort',
        '-@', str(threads),
        '-m', str(thread_mem) + 'M',
        unsorted_bam,
        outfile[:-4]
    ])
 
    common.syscall(cmd, verbose=verbose)
    os.unlink(unsorted_bam)

    cmd = samtools.exe() + ' index ' + outfile
    common.syscall(cmd, verbose=verbose)
Esempio n. 14
0
 def run_spades_once(self, kmer, outdir):
     cmd = self._make_spades_command(kmer, outdir)
     return common.syscall(cmd, verbose=self.verbose, allow_fail=True)
Esempio n. 15
0
def bwa_mem(
      ref,
      reads,
      outfile,
      threads=1,
      bwa_options = '-x pacbio',
      verbose=False,
      index=None
    ):

    samtools = external_progs.make_and_check_prog('samtools', verbose=verbose)
    bwa = external_progs.make_and_check_prog('bwa', verbose=verbose)
    unsorted_bam = outfile + '.tmp.unsorted.bam'
    tmp_index = outfile + '.tmp.bwa_index'
    bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa)

    cmd = ' '.join([
        bwa.exe(), 'mem',
        bwa_options,
        '-t', str(threads),
        tmp_index,
        reads,
        '|',
        samtools.exe(), 'view',
        '-F 0x0800',
        '-T', ref,
        '-b',
        '-o', unsorted_bam,
        '-',
    ])

    common.syscall(cmd, verbose=verbose)
    bwa_index_clean(tmp_index)
    threads = min(4, threads)
    thread_mem = int(500 / threads)

    # here we have to check for the version of samtools, starting from 1.3 the
    # -o flag is used for specifying the samtools sort output-file.
    # Starting from 1.2 you can use the -o flag, but can't have
    # -o out.bam at the end of the call, so use new style from 1.3 onwards.

    outparam = ''

    if samtools.version_at_least('1.3'):
        outparam = '-o'
        samout = outfile
    else:
        samout = outfile[:-4]

    cmd = ' '.join([
        samtools.exe(), 'sort',
        '-@', str(threads),
        '-m', str(thread_mem) + 'M',
        unsorted_bam,
        outparam,samout
    ])

    common.syscall(cmd, verbose=verbose)
    os.unlink(unsorted_bam)

    cmd = samtools.exe() + ' index ' + outfile
    common.syscall(cmd, verbose=verbose)
Esempio n. 16
0
 def run_spades_once(self, kmer, outdir):
     cmd = self._make_spades_command(kmer, outdir)
     return common.syscall(cmd, verbose=self.verbose, allow_fail=True)
Esempio n. 17
0
    def run_racon(self):
        '''Runs minimap, miniasm, racon instead of spades'''

        if self.data_type.split('-')[0] == 'pacbio':
            overlapRaadsType = 'ava-pb'  # PacBio
        else:
            overlapReadsType = 'ava-ont'  # Nanopore

        # minimap2
        cmd = [
            self.minimap2.exe(), '-t', self.threads, '-x', overlapReadsType,
            self.reads, self.reads, '>',
            os.path.join(self.outdir, 'output.paf')
        ]

        ok, errs = common.syscall(' '.join(cmd),
                                  verbose=self.verbose,
                                  allow_fail=False)
        if not ok:
            raise Error('Error running minimap2.')

        # miniasm
        cmd = [
            self.miniasm.exe(), '-Rc2', '-f', self.reads,
            os.path.join(self.outdir, 'output.paf'), '>',
            os.path.join(self.outdir, 'output.gfa')
        ]

        ok, errs = common.syscall(' '.join(cmd),
                                  verbose=self.verbose,
                                  allow_fail=False)
        if not ok:
            raise Error('Error running miniasm.')

        # gfa2fasta
        cmd = [
            self.awk.exe(), '/^S/{print ">"$2"\n"$3}',
            os.path.join(self.outdir, 'output.gfa'), '|', 'fold '
            '>',
            os.path.join(self.outdir, 'output.gfa.fasta')
        ]

        ok, errs = common.syscall(' '.join(cmd),
                                  verbose=self.verbose,
                                  allow_fail=False)
        if not ok:
            raise Error('Error running awk.')

        if self.data_type.split('-')[0] == 'pacbio':
            mapRaadsType = 'map-pb'  # PacBio
        else:
            mapReadsType = 'map-ont'  # Nanopore

        # Correction 1
        # minimap2
        cmd = [
            self.minimap2.exe(), '-t', self.threads, '-ax', mapRaadsType,
            os.path.join(self.outdir, 'output.gfa.fasta'), self.reads, '>',
            os.path.join(self.outdir, 'output.gfa1.sam')
        ]

        ok, errs = common.syscall(' '.join(cmd),
                                  verbose=self.verbose,
                                  allow_fail=False)
        if not ok:
            raise Error('Error running minimap2 correction step #1.')

        # Racon 1
        cmd = [
            self.racon.exe(), '-t', self.threads, self.reads,
            os.path.join(self.outdir, 'output.gfa1.sam'),
            os.path.join(self.outdir, 'output.gfa.fasta'), '>',
            os.path.join(self.outdir, 'output.racon1.fasta')
        ]

        ok, errs = common.syscall(' '.join(cmd),
                                  verbose=self.verbose,
                                  allow_fail=False)
        if not ok:
            raise Error('Error running racon correction step #1.')

        # Correction 2
        # minimap2 2
        cmd = [
            self.minimap2.exe(), '-t', self.threads, '-ax map-pb',
            os.path.join(self.outdir, 'output.racon1.fasta'), self.reads, '>',
            os.path.join(self.outdir, 'output.gfa2.sam')
        ]

        ok, errs = common.syscall(' '.join(cmd),
                                  verbose=self.verbose,
                                  allow_fail=False)
        if not ok:
            raise Error('Error running minimap2 correction step #2.')

        # Racon 2
        cmd = [
            self.racon.exe(), '-t', self.threads, self.reads,
            os.path.join(self.outdir, 'output.gfa2.sam'),
            os.path.join(self.outdir, 'output.racon1.fasta'), '>',
            os.path.join(self.outdir, 'output.racon2.fasta')
        ]

        ok, errs = common.syscall(' '.join(cmd),
                                  verbose=self.verbose,
                                  allow_fail=False)
        if not ok:
            raise Error('Error running racon correction step #2.')

        original_gfa = os.path.join(self.outdir, 'output.gfa')
        renamed_gfa = os.path.join(self.outdir, 'contigs.gfa')
        os.rename(original_gfa, renamed_gfa)
        original_contigs = os.path.join(self.outdir, 'output.racon2.fasta')
        renamed_contigs = os.path.join(self.outdir, 'contigs.fasta')
        os.rename(original_contigs, renamed_contigs)
Esempio n. 18
0
def bwa_mem(ref,
            reads,
            outfile,
            threads=1,
            bwa_options='-x pacbio',
            verbose=False,
            index=None):

    samtools = external_progs.make_and_check_prog('samtools', verbose=verbose)
    bwa = external_progs.make_and_check_prog('bwa', verbose=verbose)
    unsorted_bam = outfile + '.tmp.unsorted.bam'
    tmp_index = outfile + '.tmp.bwa_index'
    bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa)

    cmd = ' '.join([
        bwa.exe(),
        'mem',
        bwa_options,
        '-t',
        str(threads),
        tmp_index,
        reads,
        '|',
        samtools.exe(),
        'view',
        '-F 0x0800',
        '-T',
        ref,
        '-b',
        '-o',
        unsorted_bam,
        '-',
    ])

    common.syscall(cmd, verbose=verbose)
    bwa_index_clean(tmp_index)
    threads = min(4, threads)
    thread_mem = int(500 / threads)

    # here we have to check for the version of samtools, starting from 1.3 the
    # -o flag is used for specifying the samtools sort output-file.
    # Starting from 1.2 you can use the -o flag, but can't have
    # -o out.bam at the end of the call, so use new style from 1.3 onwards.

    outparam = ''

    if samtools.version_at_least('1.3'):
        outparam = '-o'
        samout = outfile
    else:
        samout = outfile[:-4]

    cmd = ' '.join([
        samtools.exe(), 'sort', '-@',
        str(threads), '-m',
        str(thread_mem) + 'M', unsorted_bam, outparam, samout
    ])

    common.syscall(cmd, verbose=verbose)
    os.unlink(unsorted_bam)

    cmd = samtools.exe() + ' index ' + outfile
    common.syscall(cmd, verbose=verbose)