def repair_reads(forward_in,
                 forward_out,
                 returncmd=False,
                 reverse_in='NA',
                 reverse_out='NA'):
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        if reverse_out == 'NA':
            if '_R1' in forward_out:
                reverse_out = forward_out.replace('_R1', '_R2')
            else:
                raise ValueError(
                    'If you do not specify reverse_out, forward_out must contain _R1.\n\n'
                )
        cmd = 'repair.sh in1={} in2={} out1={} out2={} tossbrokenreads=t repair=t overwrite=t'\
            .format(forward_in, reverse_in, forward_out, reverse_out)
    else:
        if reverse_out == 'NA':
            raise ValueError('Reverse output reads must be specified.')
        cmd = 'repair.sh in1={} in2={} out1={} out2={} tossbrokenreads=t repair=t overwrite=t'\
            .format(forward_in, reverse_in, forward_out, reverse_out)
    if not os.path.isfile(forward_out):
        out, err = accessoryfunctions.run_subprocess(cmd)
    else:
        out = str()
        err = str()
    if returncmd:
        return out, err, cmd
    else:
        return out, err
def kmercountexact(forward_in, reverse_in='NA', returncmd=False, **kwargs):
    """
    Wrapper for kmer count exact.
    :param forward_in: Forward input reads.
    :param reverse_in: Reverse input reads. Found automatically for certain conventions.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param kwargs: Arguments to give to kmercountexact in parameter='argument' format.
    See kmercountexact documentation for full list.
    :return: out and err: stdout string and stderr string from running kmercountexact.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        cmd = 'kmercountexact.sh in={} in2={} {}'.format(
            forward_in, reverse_in, options)
    elif reverse_in == 'NA':
        cmd = 'kmercountexact.sh in={} {}'.format(forward_in, options)
    else:
        cmd = 'kmercountexact.sh in={} in2={} {}'.format(
            forward_in, reverse_in, options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
def seal(reference,
         forward_in,
         output_file,
         reverse_in='NA',
         returncmd=False,
         **kwargs):
    """
    Runs seal from the bbtools package.
    :param reference: Reference file, in fasta format.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param forward_in: Forward reads, fastq format.
    :param output_file: Output file to put rpkm statistics into.
    :param reverse_in: Reverse reads. Not necessary to specify if in same folder and follow _R1/_R2 convention.
    :param kwargs: Arguments to give to seal in parameter=argument format. See seal documentation for full list.
    :return: out and err: stdout string and stderr string from running seal.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        cmd = 'seal.sh ref={} in={} in2={} rpkm={} nodisk{}'.format(
            reference, forward_in, reverse_in, output_file, options)
    elif reverse_in == 'NA':
        cmd = 'seal.sh ref={} in={} rpkm={} nodisk{}'.format(
            reference, forward_in, output_file, options)
    else:
        cmd = 'seal.sh ref={} in={} in2={} rpkm={} nodisk{}'.format(
            reference, forward_in, reverse_in, output_file, options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #4
0
def screen(*args,
           output_file='screen.tab',
           threads=1,
           returncmd=False,
           **kwargs):
    """
    Wrapper for mash screen. Requires mash v2.0 or higher.
    :param args: Files you want to screen. First argument must be a sketch.
    :param output_file: Output to write containment info to.
    :param threads: Number of threads to run mash on.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param kwargs: Other arguments, in parameter='argument' format. If parameter is just a switch, do parameter=''
    :return: stdout and stderr from mash screen
    """
    options = kwargs_to_string(kwargs)
    cmd = 'mash screen '
    for arg in args:
        cmd += arg + ' '
    cmd += ' -p {} {} | sort -gr > {}'.format(str(threads), options,
                                              output_file)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
def bbmerge(forward_in,
            merged_reads,
            returncmd=False,
            reverse_in='NA',
            **kwargs):
    """
    Runs bbmerge.
    :param forward_in: Forward input reads. Reverse reads automatically detected if present in the same folder.
    :param merged_reads: Output file to write merged reads to.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param reverse_in: Reverse input file, if you don't want it autodetected.
    :param kwargs: Other arguments to give to bbmerge in parameter='argument' format. See bbmerge documentation for full list.
    :return: out and err: stdout string and stderr string from running bbmerge.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        cmd = 'bbmerge.sh in={} in2={} out={} {}'.format(
            forward_in, reverse_in, merged_reads, options)
    elif reverse_in == 'NA':
        cmd = 'bbmerge.sh in={} out={} {}'.format(forward_in, merged_reads,
                                                  options)
    else:
        cmd = 'bbmerge.sh in={} in2={} out={} {}'.format(
            forward_in, reverse_in, merged_reads, options)
    if not os.path.isfile(merged_reads):
        out, err = accessoryfunctions.run_subprocess(cmd)
    else:
        out = str()
        err = str()
    if returncmd:
        return out, err, cmd
    else:
        return out, err
def bbmap(reference,
          forward_in,
          out_bam,
          reverse_in='NA',
          returncmd=False,
          **kwargs):
    """
    Wrapper for bbmap. Assumes that bbmap executable is in your $PATH.
    :param reference: Reference fasta. Won't be written to disk by default. If you want it to be, add nodisk='t' as an arg.
    :param forward_in: Input reads. Should be in fastq format.
    :param out_bam: Output file. Should end in .sam or .bam
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param reverse_in: If your reverse reads are present and normal conventions (_R1 for forward, _R2 for reverse) are
     followed, the reverse reads will be followed automatically. If you want to specify reverse reads, you may do so.
    :param kwargs: Other arguments to give to bbmap in parameter=argument format. See bbmap documentation for full list.
    :return: out and err: stdout string and stderr string from running bbmap.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        cmd = 'bbmap.sh ref={} in={} in2={} out={} nodisk{}'.format(
            reference, forward_in, reverse_in, out_bam, options)
    elif reverse_in == 'NA':
        cmd = 'bbmap.sh ref={} in={} out={} nodisk{}'.format(
            reference, forward_in, out_bam, options)
    else:
        cmd = 'bbmap.sh ref={} in={} in2={} out={} nodisk{}'.format(
            reference, forward_in, reverse_in, out_bam, options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #7
0
def dist(*args,
         output_file='distances.tab',
         threads=1,
         returncmd=False,
         **kwargs):
    """
    Wrapper for mash dist.
    :param args: Files you want to find distances between. Can be
    :param output_file: Output file to write your distances to. Default distances.tab
    :param threads: Number of threads to run mash on.
    :param kwargs: Other arguments, in parameter='argument' format. If parameter is just a switch, do parameter=''
    :param returncmd: If true, will return the command used to call mash as well as out and err.
    :return: stdout and stderr from mash dist
    """
    options = kwargs_to_string(kwargs)
    if len(args) == 0:
        raise ValueError(
            'At least one file to sketch must be specified. You specified 0 files.'
        )
    cmd = 'mash dist '
    for arg in args:
        cmd += arg + ' '
    cmd += ' -p {} {} > {}'.format(str(threads), options, output_file)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #8
0
def sketch(*args,
           output_sketch='sketch.msh',
           threads=1,
           returncmd=False,
           **kwargs):
    """
    Wrapper for mash sketch.
    :param args: Files you want to sketch. Any number can be passed in, file patterns (i.e. *fasta) can be used.
    :param output_sketch: Output file for your sketch. Default sketch.msh.
    :param threads: Number of threads to run analysis on.
    :param kwargs: Other arguments, in parameter='argument' format. If parameter is just a switch, do parameter=''
    :param returncmd: If true, will return the command used to call mash as well as out and err.
    :return: stdout and stderr from mash sketch
    """
    options = kwargs_to_string(kwargs)
    if len(args) == 0:
        raise ValueError(
            'At least one file to sketch must be specified. You specified 0 files.'
        )
    cmd = 'mash sketch '
    for arg in args:
        cmd += arg + ' '
    cmd += '-o {} -p {} {}'.format(output_sketch, str(threads), options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
def reformat_reads(forward_in,
                   forward_out,
                   returncmd=False,
                   reverse_in='NA',
                   reverse_out='NA',
                   **kwargs):
    """

    :param forward_in:
    :param forward_out:
    :param returncmd:
    :param reverse_in:
    :param reverse_out:
    :param kwargs:
    :return:
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        if reverse_out == 'NA':
            if '_R1' in forward_out:
                reverse_out = forward_out.replace('_R1', '_R2')
            else:
                raise ValueError(
                    'If you do not specify reverse_out, forward_out must contain _R1.\n\n'
                )
        cmd = 'reformat.sh in1={forward_in} in2={reverse_in} out1={forward_out} out2={reverse_out} ' \
              'tossbrokenreads=t ow=t{options}'\
            .format(forward_in=forward_in,
                    reverse_in=reverse_in,
                    forward_out=forward_out,
                    reverse_out=reverse_out,
                    options=options)
    elif reverse_in == 'NA' or reverse_in is None:
        cmd = 'reformat.sh in={forward_in} out={forward_out} tossbrokenreads=t ow=t{options}'\
            .format(forward_in=forward_in,
                    forward_out=forward_out,
                    options=options)
    else:
        if reverse_out == 'NA':
            raise ValueError('Reverse output reads must be specified.')
        cmd = 'reformat.sh in1={forward_in} in2={reverse_in} out1={forward_out} out2={reverse_out} ' \
              'tossbrokenreads=t ow=t{options}'\
            .format(forward_in=forward_in,
                    reverse_in=reverse_in,
                    forward_out=forward_out,
                    reverse_out=reverse_out,
                    options=options)
    if not os.path.isfile(forward_out):
        out, err = accessoryfunctions.run_subprocess(cmd)
    else:
        out = str()
        err = str()
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #10
0
def kmc(forward_in,
        database_name,
        min_occurrences=1,
        reverse_in='NA',
        k=31,
        cleanup=True,
        returncmd=False,
        tmpdir='tmp',
        **kwargs):
    """
    Runs kmc to count kmers.
    :param forward_in: Forward input reads. Assumed to be fastq.
    :param database_name: Name for output kmc database.
    :param min_occurrences: Minimum number of times kmer must be seen to be included in database.
    :param reverse_in: Reverse input reads. Automatically found.
    :param k: Kmer size. Default 31.
    :param cleanup: If true, deletes tmpdir that kmc needs.
    :param tmpdir: Temporary directory to store intermediary kmc files. Default tmp.
    :param returncmd: If true, will return the command used to call KMC as well as out and err.
    :param kwargs: Other kmc arguments in parameter='argument' format.
    :return: Stdout and stderr from kmc.
    """
    # Create the tmpdir kmc needs if it isn't already present.
    if not os.path.isdir(tmpdir):
        os.makedirs(tmpdir)
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        filelist = os.path.join(tmpdir, 'filelist.txt')
        with open(filelist, 'w') as f:
            f.write(forward_in + '\n')
            f.write(reverse_in + '\n')
        cmd = 'kmc -k{} -ci{} {} @{} {} {}'.format(k, min_occurrences, options,
                                                   filelist, database_name,
                                                   tmpdir)
    elif reverse_in == 'NA':
        cmd = 'kmc -k{} -ci{} {} {} {} {}'.format(k, min_occurrences, options,
                                                  forward_in, database_name,
                                                  tmpdir)
    else:
        filelist = os.path.join(tmpdir, 'filelist.txt')
        with open(filelist, 'w') as f:
            f.write(forward_in + '\n')
            f.write(reverse_in + '\n')
        cmd = 'kmc -k{} -ci{} {} @{} {} {}'.format(k, min_occurrences, options,
                                                   filelist, database_name,
                                                   tmpdir)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if cleanup:
        shutil.rmtree(tmpdir)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #11
0
def validate_reads(forward_in, returncmd=False, reverse_in='NA'):
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        cmd = 'reformat.sh in1={} in2={} vpair'.format(forward_in, reverse_in)
    elif reverse_in == 'NA':
        cmd = 'reformat.sh in={}'.format(forward_in)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #12
0
def union(database_1, database_2, results, returncmd=False):
    """
    Finds kmers that are present in either of the two databases provided (as well as reads found in both).
    :param database_1: First database generated by kmc.
    :param database_2: Second database generated by kmc.
    :param results: Result database, containing reads in either database 1 or 2 (or both).
    :param returncmd: If true, will return the command used to call KMC as well as out and err.
    :return: Stdout and stderr from kmc.
    """
    cmd = 'kmc_tools union {} {} {}'.format(database_1, database_2, results)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #13
0
def tadpole(forward_in,
            forward_out,
            reverse_in='NA',
            returncmd=False,
            reverse_out='NA',
            mode='correct',
            **kwargs):
    """
    Runs tadpole. Default is to run in correction mode, but other modes ('contig', 'extend') can also be specified.
    :param forward_in: Forward input reads.
    :param forward_out: Forward output reads.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param reverse_in: Reverse reads. Only specify if not following _R1/_R2 convention/not in same folder as input.
    :param reverse_out: Reverse output reads. Automatically generated unless specified.
    :param mode: Mode to run tadpole in. Default is 'correct'.
    :param kwargs: Other arguments to give to tadpole in parameter='argument' format. See tadpole documentation for full list.
    :return: out and err: stdout string and stderr string from running tadpole.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        if reverse_out == 'NA':
            if '_R1' in forward_out:
                reverse_out = forward_out.replace('_R1', '_R2')
            else:
                raise ValueError(
                    'If you do not specify reverse_out, forward_out must contain _R1.\n\n'
                )
        cmd = 'tadpole.sh in1={} in2={} out1={} out2={} mode={} {}'.format(
            forward_in, reverse_in, forward_out, reverse_out, mode, options)
    elif reverse_in == 'NA':
        cmd = 'tadpole.sh in={} out={} mode={} {}'.format(
            forward_in, forward_out, mode, options)
    else:
        if reverse_out == 'NA':
            raise ValueError('Reverse output reads must be specified.')
        cmd = 'tadpole.sh in1={} in2={} out1={} out2={} mode={} {}'.format(
            forward_in, reverse_in, forward_out, reverse_out, mode, options)
    if not os.path.isfile(forward_out):
        out, err = accessoryfunctions.run_subprocess(cmd)
    else:
        out = str()
        err = str()
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #14
0
def dedupe(input_file, output_file, returncmd=False, **kwargs):
    """
    Runs dedupe from the bbtools package.
    :param input_file: Input file.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param output_file: Output file.
    :param kwargs: Arguments to give to dedupe in parameter=argument format. See dedupe documentation for full list.
    :return: out and err: stdout string and stderr string from running dedupe.
    """
    options = kwargs_to_string(kwargs)
    cmd = 'dedupe.sh in={} out={}{}'.format(input_file, output_file, options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #15
0
def intersect(database_1, database_2, results, returncmd=False):
    """
    Finds kmers that are present in 2 databases.
    :param database_1: First database generated by kmc.
    :param database_2: Second database generated by kmc.
    :param results: Result database, containing reads in both database 1 and 2.
    :param returncmd: If true, will return the command used to call KMC as well as out and err.
    :return: Stdout and stderr from kmc.
    """
    cmd = 'kmc_tools intersect {} {} {}'.format(database_1, database_2,
                                                results)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #16
0
def randomreads(reference,
                length,
                reads,
                out_fastq,
                paired=False,
                returncmd=False,
                **kwargs):
    """
    Wrapper for bbmap. Assumes that bbmap executable is in your $PATH.
    :param reference: Reference fasta. Won't be written to disk by default. If you want it to be, add nodisk='t' as an arg.
    :param length: Length of reads to simulate
    :param reads: Number of reads to simulate
    :param out_fastq: Output file. Should end in .fastq or .fastq.gz
    :param paired: Create paired FASTQ files rather than single
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param kwargs: Other arguments to give to bbmap in parameter=argument format. See documentation for full list.
    :return: out and err (and cmd if specified): stdout string and stderr string from running bbmap.
    """
    options = kwargs_to_string(kwargs)
    # If the paired option is selected, set the name of the reverse reads to be the same as the forward reads
    # but replace _R1 with _R2
    if paired:
        out_fastq2 = out_fastq.replace('_R1', '_R2')
        # Create the call to randomreads - use paired=t
        cmd = 'randomreads.sh ref={ref} out={out} out2={out2} length={length} reads={reads} paired=t{options}'\
            .format(ref=reference,
                    out=out_fastq,
                    out2=out_fastq2,
                    length=length,
                    reads=reads,
                    options=options)
    else:
        cmd = 'randomreads.sh ref={ref} out={out} length={length} reads={reads}{options}'\
            .format(ref=reference,
                    out=out_fastq,
                    length=length,
                    reads=reads,
                    options=options)
    if not os.path.isfile(out_fastq):
        out, err = accessoryfunctions.run_subprocess(cmd)
    else:
        out = str()
        err = str()
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #17
0
def bbduk_filter(reference,
                 forward_in,
                 forward_out,
                 returncmd=False,
                 reverse_in='NA',
                 reverse_out='NA',
                 **kwargs):
    """
    Uses bbduk to filter out reads that have kmers matching to a reference.
    :param reference: Reference you want to pull reads out for. Should be in fasta format.
    :param forward_in: Forward reads you want to quality trim.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param forward_out: Output forward reads.
    :param reverse_in: Reverse input reads. Don't need to be specified if _R1/_R2 naming convention is used.
    :param reverse_out: Reverse output reads. Don't need to be specified if _R1/_R2 convention is used.
    :param kwargs: Other arguments to give to bbduk in parameter=argument format. See bbduk documentation for full list.
    :return: out and err: stdout string and stderr string from running bbduk.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        if reverse_out == 'NA':
            if '_R1' in forward_out:
                reverse_out = forward_out.replace('_R1', '_R2')
            else:
                raise ValueError(
                    'If you do not specify reverse_out, forward_out must contain _R1.\n\n'
                )
        cmd = 'bbduk.sh in={} in2={} out={} out2={} ref={}{}'.format(
            forward_in, reverse_in, forward_out, reverse_out, reference,
            options)
    elif reverse_in == 'NA':
        cmd = 'bbduk.sh in={} out={} ref={}{}'.format(forward_in, forward_out,
                                                      reference, options)
    else:
        if reverse_out == 'NA':
            raise ValueError('Reverse output reads must be specified.')
        cmd = 'bbduk.sh in={} in2={} out={} out2={} ref={}{}'.format(
            forward_in, reverse_in, forward_out, reverse_out, reference,
            options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #18
0
def bbnorm(forward_in,
           forward_out,
           returncmd=False,
           reverse_in='NA',
           reverse_out='NA',
           **kwargs):
    """
    Runs bbnorm to normalize read depth. Default target kmer depth is left at bbnorm's default, which is 100.
    :param forward_in: Forward input reads.
    :param forward_out: Forward output reads.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param reverse_in: Reverse reads. Only specify if not following _R1/_R2 convention/not in same folder as input.
    :param reverse_out: Reverse output reads. Automatically generated unless specified.
    :param kwargs: Other arguments to give to bbnorm in parameter='argument' format. See bbnorm documentation for full list.
    :return: out and err: stdout string and stderr string from running bbnorm.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        if reverse_out == 'NA':
            if '_R1' in forward_out:
                reverse_out = forward_out.replace('_R1', '_R2')
            else:
                raise ValueError(
                    'If you do not specify reverse_out, forward_out must contain _R1.\n\n'
                )
        cmd = 'bbnorm.sh in1={} in2={} out={} out2={} {}'.format(
            forward_in, reverse_in, forward_out, reverse_out, options)
    elif reverse_in == 'NA':
        cmd = 'bbnorm.sh in={} out={} {}'.format(forward_in, forward_out,
                                                 options)
    else:
        if reverse_out == 'NA':
            raise ValueError('Reverse output reads must be specified.')
        cmd = 'bbnorm.sh in1={} in2={} out1={} out2={} {}'.format(
            forward_in, reverse_in, forward_out, reverse_out, options)
    if not os.path.isfile(forward_out):
        out, err = accessoryfunctions.run_subprocess(cmd)
    else:
        out = str()
        err = str()
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #19
0
def subtract(database_1,
             database_2,
             results,
             exclude_below=1,
             returncmd=False):
    """
    Subtracts database 2 from database 1. Results can then be dumped to view what kmers are present only in database 1.
    :param database_1: First database generated by kmc.
    :param database_2: Second database generated by kmc.
    :param results: Result database, containing reads in both database 1 but not in 2..
    :param exclude_below: Don't subtract kmers from database 1 that have less than this many occurrences
    in database 2.
    :param returncmd: If true, will return the command used to call KMC as well as out and err.
    :return: Stdout and stderr from kmc.
    """
    cmd = 'kmc_tools kmers_subtract {} {} -ci{} {}'.format(
        database_1, database_2, str(exclude_below), results)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #20
0
def subsample_reads(forward_in,
                    forward_out,
                    num_bases,
                    returncmd=False,
                    reverse_in='NA',
                    reverse_out='NA',
                    **kwargs):
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        if reverse_out == 'NA':
            if '_R1' in forward_out:
                reverse_out = forward_out.replace('_R1', '_R2')
            else:
                raise ValueError(
                    'If you do not specify reverse_out, forward_out must contain _R1.\n\n'
                )
        cmd = 'reformat.sh in1={} in2={} out1={} out2={} samplebasestarget={} {}'.format(
            forward_in, reverse_in, forward_out, reverse_out, str(num_bases),
            options)
    elif reverse_in == 'NA':
        cmd = 'reformat.sh in={} out={} samplebasestarget={} {}'.format(
            forward_in, forward_out, str(num_bases), options)
    else:
        if reverse_out == 'NA':
            raise ValueError('Reverse output reads must be specified.')
        cmd = 'reformat.sh in1={} in2={} out1={} out2={} samplebasestarget={} {}'.format(
            forward_in, reverse_in, forward_out, reverse_out, str(num_bases),
            options)
    if not os.path.isfile(forward_out):
        out, err = accessoryfunctions.run_subprocess(cmd)
    else:
        out = str()
        err = str()
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #21
0
def dump(database,
         output,
         min_occurences=1,
         max_occurences=250,
         returncmd=False):
    """
    Dumps output from kmc database into tab-delimited format.
    :param database: Database generated by kmc.
    :param output: Name for output.
    :param min_occurences: Minimum number of times kmer must be in database to be dumped.
    :param max_occurences: Maximum number of times a kmer can be seen and still be dumped.
    :param returncmd: If true, will return the command used to call KMC as well as out and err.
    :return: Stdout and stderr from kmc.
    """
    cmd = 'kmc_tools dump {database} -ci{min} -cx{max} {output}'\
        .format(database=database,
                min=min_occurences,
                max=max_occurences,
                output=output)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #22
0
def prokka(input_fasta, output_dir, output_name, **kwargs):
    options = kwargs_to_string(kwargs)
    cmd = 'prokka --outdir {} --prefix {} {} {}'.format(
        output_dir, output_name, options, input_fasta)
    out, err = accessoryfunctions.run_subprocess(cmd)
    return out, err
예제 #23
0
def bbduk_trim(forward_in,
               forward_out,
               reverse_in='NA',
               reverse_out='NA',
               trimq=20,
               k=25,
               minlength=50,
               forcetrimleft=15,
               hdist=1,
               returncmd=False,
               **kwargs):
    """
    Wrapper for using bbduk to quality trim reads. Contains arguments used in OLC Assembly Pipeline, but these can
    be overwritten by using keyword parameters.
    :param forward_in: Forward reads you want to quality trim.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param forward_out: Output forward reads.
    :param reverse_in: Reverse input reads. Don't need to be specified if _R1/_R2 naming convention is used.
    :param reverse_out: Reverse output reads. Don't need to be specified if _R1/_R2 convention is used.
    :param kwargs: Other arguments to give to bbduk in parameter=argument format. See bbduk documentation for full list.
    :return: out and err: stdout string and stderr string from running bbduk.
    """
    options = kwargs_to_string(kwargs)
    cmd = 'which bbduk.sh'
    try:
        subprocess.check_output(cmd.split()).decode('utf-8')
    except subprocess.CalledProcessError:
        print(
            'ERROR: Could not find bbduk. Plase check that the bbtools package is installed and on your $PATH.\n\n'
        )
        raise FileNotFoundError
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        if reverse_out == 'NA':
            if '_R1' in forward_out:
                reverse_out = forward_out.replace('_R1', '_R2')
            else:
                raise ValueError(
                    'If you do not specify reverse_out, forward_out must contain R1.\n\n'
                )
        cmd = 'bbduk.sh in1={f_in} in2={r_in} out1={f_out} out2={r_out} qtrim=w trimq={trimq} k={k} ' \
              'minlength={minlength} forcetrimleft={forcetrimleft} ref=adapters overwrite hdist={hdist} tpe tbo{optn}'\
            .format(f_in=forward_in,
                    r_in=reverse_in,
                    f_out=forward_out,
                    r_out=reverse_out,
                    trimq=trimq,
                    k=k,
                    minlength=minlength,
                    forcetrimleft=forcetrimleft,
                    hdist=hdist,
                    optn=options)
    elif reverse_in == 'NA' or reverse_in is None:
        cmd = 'bbduk.sh in={f_in} out={f_out} qtrim=w trimq={trimq} k={k} minlength={minlength} ' \
              'forcetrimleft={forcetrimleft} ref=adapters overwrite hdist={hdist} tpe tbo{optn}'\
            .format(f_in=forward_in,
                    f_out=forward_out,
                    trimq=trimq,
                    k=k,
                    minlength=minlength,
                    forcetrimleft=forcetrimleft,
                    hdist=hdist,
                    optn=options)
    else:
        if reverse_out == 'NA':
            raise ValueError('Reverse output reads must be specified.')
        cmd = 'bbduk.sh in1={f_in} in2={r_in} out1={f_out} out2={r_out} qtrim=w trimq={trimq} k={k} ' \
              'minlength={minlength} forcetrimleft={forcetrimleft} ref=adapters overwrite hdist={hdist} tpe tbo{optn}' \
            .format(f_in=forward_in,
                    r_in=reverse_in,
                    f_out=forward_out,
                    r_out=reverse_out,
                    trimq=trimq,
                    k=k,
                    minlength=minlength,
                    forcetrimleft=forcetrimleft,
                    hdist=hdist,
                    optn=options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err