예제 #1
0
def bbmap(reference,
          forward_in,
          out_bam,
          reverse_in='NA',
          returncmd=False,
          **kwargs):
    """
    Wrapper for bbmap. Assumes that bbmap executable is in your $PATH.
    :param reference: Reference fasta. Won't be written to disk by default. If you want it to be, add nodisk='t' as an arg.
    :param forward_in: Input reads. Should be in fastq format.
    :param out_bam: Output file. Should end in .sam or .bam
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param reverse_in: If your reverse reads are present and normal conventions (_R1 for forward, _R2 for reverse) are
     followed, the reverse reads will be followed automatically. If you want to specify reverse reads, you may do so.
    :param kwargs: Other arguments to give to bbmap in parameter=argument format. See bbmap documentation for full list.
    :return: out and err: stdout string and stderr string from running bbmap.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        cmd = 'bbmap.sh ref={} in={} in2={} out={} nodisk{}'.format(
            reference, forward_in, reverse_in, out_bam, options)
    elif reverse_in == 'NA':
        cmd = 'bbmap.sh ref={} in={} out={} nodisk{}'.format(
            reference, forward_in, out_bam, options)
    else:
        cmd = 'bbmap.sh ref={} in={} in2={} out={} nodisk{}'.format(
            reference, forward_in, reverse_in, out_bam, options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #2
0
def seal(reference,
         forward_in,
         output_file,
         reverse_in='NA',
         returncmd=False,
         **kwargs):
    """
    Runs seal from the bbtools package.
    :param reference: Reference file, in fasta format.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param forward_in: Forward reads, fastq format.
    :param output_file: Output file to put rpkm statistics into.
    :param reverse_in: Reverse reads. Not necessary to specify if in same folder and follow _R1/_R2 convention.
    :param kwargs: Arguments to give to seal in parameter=argument format. See seal documentation for full list.
    :return: out and err: stdout string and stderr string from running seal.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        cmd = 'seal.sh ref={} in={} in2={} rpkm={} nodisk{}'.format(
            reference, forward_in, reverse_in, output_file, options)
    elif reverse_in == 'NA':
        cmd = 'seal.sh ref={} in={} rpkm={} nodisk{}'.format(
            reference, forward_in, output_file, options)
    else:
        cmd = 'seal.sh ref={} in={} in2={} rpkm={} nodisk{}'.format(
            reference, forward_in, reverse_in, output_file, options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #3
0
def bbmerge(forward_in,
            merged_reads,
            returncmd=False,
            reverse_in='NA',
            **kwargs):
    """
    Runs bbmerge.
    :param forward_in: Forward input reads. Reverse reads automatically detected if present in the same folder.
    :param merged_reads: Output file to write merged reads to.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param reverse_in: Reverse input file, if you don't want it autodetected.
    :param kwargs: Other arguments to give to bbmerge in parameter='argument' format. See bbmerge documentation for full list.
    :return: out and err: stdout string and stderr string from running bbmerge.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        cmd = 'bbmerge.sh in={} in2={} out={} {}'.format(
            forward_in, reverse_in, merged_reads, options)
    elif reverse_in == 'NA':
        cmd = 'bbmerge.sh in={} out={} {}'.format(forward_in, merged_reads,
                                                  options)
    else:
        cmd = 'bbmerge.sh in={} in2={} out={} {}'.format(
            forward_in, reverse_in, merged_reads, options)
    if not os.path.isfile(merged_reads):
        out, err = accessoryfunctions.run_subprocess(cmd)
    else:
        out = str()
        err = str()
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #4
0
def kmercountexact(forward_in, reverse_in='NA', **kwargs):
    """
    Wrapper for kmer count exact.
    :param forward_in: Forward input reads.
    :param reverse_in: Reverse input reads. Found automatically for certain conventions.
    :param kwargs: Arguments to give to kmercountexact in parameter='argument' format.
    See kmercountexact documentation for full list.
    :return: out and err: stdout string and stderr string from running kmercountexact.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            'R1', 'R2')) and reverse_in == 'NA' and 'R1' in forward_in:
        reverse_in = forward_in.replace('R1', 'R2')
        cmd = 'kmercountexact.sh in={} in2={} {}'.format(
            forward_in, reverse_in, options)
    elif reverse_in == 'NA':
        cmd = 'kmercountexact.sh in={} {}'.format(forward_in, options)
    else:
        cmd = 'kmercountexact.sh in={} in2={} {}'.format(
            forward_in, reverse_in, options)
    try:
        out, err = accessoryfunctions.run_subprocess(cmd)
    except subprocess.CalledProcessError as e:
        out = str()
        err = e
    return out, err, cmd
예제 #5
0
def kmercountexact(forward_in, reverse_in='NA', returncmd=False, **kwargs):
    """
    Wrapper for kmer count exact.
    :param forward_in: Forward input reads.
    :param reverse_in: Reverse input reads. Found automatically for certain conventions.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param kwargs: Arguments to give to kmercountexact in parameter='argument' format.
    See kmercountexact documentation for full list.
    :return: out and err: stdout string and stderr string from running kmercountexact.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        cmd = 'kmercountexact.sh in={} in2={} {}'.format(
            forward_in, reverse_in, options)
    elif reverse_in == 'NA':
        cmd = 'kmercountexact.sh in={} {}'.format(forward_in, options)
    else:
        cmd = 'kmercountexact.sh in={} in2={} {}'.format(
            forward_in, reverse_in, options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #6
0
def repair_reads(forward_in,
                 forward_out,
                 returncmd=False,
                 reverse_in='NA',
                 reverse_out='NA'):
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        if reverse_out == 'NA':
            if '_R1' in forward_out:
                reverse_out = forward_out.replace('_R1', '_R2')
            else:
                raise ValueError(
                    'If you do not specify reverse_out, forward_out must contain _R1.\n\n'
                )
        cmd = 'repair.sh in1={} in2={} out1={} out2={} tossbrokenreads=t repair=t overwrite=t'\
            .format(forward_in, reverse_in, forward_out, reverse_out)
    else:
        if reverse_out == 'NA':
            raise ValueError('Reverse output reads must be specified.')
        cmd = 'repair.sh in1={} in2={} out1={} out2={} tossbrokenreads=t repair=t overwrite=t'\
            .format(forward_in, reverse_in, forward_out, reverse_out)
    if not os.path.isfile(forward_out):
        out, err = accessoryfunctions.run_subprocess(cmd)
    else:
        out = str()
        err = str()
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #7
0
파일: kmc.py 프로젝트: lowandrew/BioTools
def intersect(database_1, database_2, results):
    """
    Finds reads that are present in 2 databases.
    :param database_1: First database generated by kmc.
    :param database_2: Second database generated by kmc.
    :param results: Result database, containing reads in both database 1 and 2.
    :return: Stdout and stderr from kmc.
    """
    cmd = 'kmc_tools intersect {} {} {}'.format(database_1, database_2,
                                                results)
    out, err = accessoryfunctions.run_subprocess(cmd)
    return out, err
예제 #8
0
def validate_reads(forward_in, returncmd=False, reverse_in='NA'):
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        cmd = 'reformat.sh in1={} in2={} vpair'.format(forward_in, reverse_in)
    elif reverse_in == 'NA':
        cmd = 'reformat.sh in={}'.format(forward_in)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #9
0
def bbduk_trim(forward_in,
               forward_out,
               reverse_in='NA',
               reverse_out='NA',
               **kwargs):
    """
    Wrapper for using bbduk to quality trim reads. Contains arguments used in OLC Assembly Pipeline, but these can
    be overwritten by using keyword parameters.
    :param forward_in: Forward reads you want to quality trim.
    :param forward_out: Output forward reads.
    :param reverse_in: Reverse input reads. Don't need to be specified if R1/R2 naming convention is used.
    :param reverse_out: Reverse output reads. Don't need to be specified if R1/R2 convention is used.
    :param kwargs: Other arguments to give to bbduk in parameter=argument format. See bbduk documentation for full list.
    :return: out and err: stdout string and stderr string from running bbduk.
    """
    options = kwargs_to_string(kwargs)
    cmd = 'which bbduk.sh'
    try:
        bbduk_dir = subprocess.check_output(cmd.split()).decode('utf-8')
        bbduk_dir = os.path.split(bbduk_dir)[:-1]
        bbduk_dir = bbduk_dir[0]
    except subprocess.CalledProcessError:
        print(
            'ERROR: Could not find bbduk. Plase check that the bbtools package is installed and on your $PATH.\n\n'
        )
        raise FileNotFoundError
    if os.path.isfile(forward_in.replace(
            'R1', 'R2')) and reverse_in == 'NA' and 'R1' in forward_in:
        reverse_in = forward_in.replace('R1', 'R2')
        if reverse_out == 'NA':
            if 'R1' in forward_out:
                reverse_out = forward_out.replace('R1', 'R2')
            else:
                raise ValueError(
                    'If you do not specify reverse_out, forward_out must contain R1.\n\n'
                )
        cmd = 'bbduk.sh in1={} in2={} out1={} out2={} qtrim=w trimq=20 k=25 minlength=50 forcetrimleft=15' \
              ' ref={}/resources/adapters.fa overwrite hdist=1 tpe tbo{}'.format(forward_in, reverse_in,
                                                                                 forward_out, reverse_out,
                                                                                 bbduk_dir, options)
    elif reverse_in == 'NA':
        cmd = 'bbduk.sh in={} out={} qtrim=w trimq=20 k=25 minlength=50 forcetrimleft=15' \
              ' ref={}/resources/adapters.fa overwrite hdist=1 tpe tbo{}'.format(forward_in, forward_out,
                                                                                 bbduk_dir, options)
    else:
        if reverse_out == 'NA':
            raise ValueError('Reverse output reads must be specified.')
        cmd = 'bbduk.sh in1={} in2={} out1={} out2={} qtrim=w trimq=20 k=25 minlength=50 forcetrimleft=15' \
              ' ref={}/resources/adapters.fa overwrite hdist=1 tpe tbo{}'.format(forward_in, reverse_in,
                                                                                 forward_out, reverse_out,
                                                                                 bbduk_dir, options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    return out, err
예제 #10
0
def dedupe(input_file, output_file, **kwargs):
    """
    Runs dedupe from the bbtools package.
    :param input_file: Input file.
    :param output_file: Output file.
    :param kwargs: Arguments to give to dedupe in parameter=argument format. See dedupe documentation for full list.
    :return: out and err: stdout string and stderr string from running dedupe.
    """
    options = kwargs_to_string(kwargs)
    cmd = 'dedupe.sh in={} out={}{}'.format(input_file, output_file, options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    return out, err
예제 #11
0
def dump(mer_file, output_file='counts.fasta', options=''):
    """
    Dumps output from jellyfish count into a human-readable format.
    :param mer_file: Output from jellyfish count.
    :param output_file: Where to store output. Default counts.fasta
    :param options: Other options to pass to jellyfish. Input should be a string, with options typed as they would be
    on the command line.
    :return: Stdout and stderr from calling jellyfish.
    """
    cmd = 'jellyfish dump {} -o {} {}'.format(mer_file, output_file, options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    return out, err
예제 #12
0
def count(forward_in, reverse_in='NA', kmer_size=31, count_file='mer_counts.jf', hash_size='100M', options='',
          returncmd=False):
    """
    Runs jellyfish count to kmerize reads to a desired kmer size.
    :param forward_in: Forward input reads or fasta file. Can be uncompressed or gzip compressed.
    :param reverse_in: Reverse input reads. Found automatically if in same folder as forward and _R1/_R2 naming convention
    used.
    :param kmer_size: Kmer size to get jellyfish to use. Default 31.
    :param count_file: File to have jellyfish output mer counts to. Default mer_counts.jf
    :param hash_size: Hash size. Should be suitable for most, if not all, bacterial genomes, and as of jellyfish2 should
    adjust to be larger automatically if needed.
    :param options: Other options to pass to jellyfish. Input should be a string, with options typed as they would be
    on the command line.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :return: Stdout and stderr from calling jellyfish.
    """
    create_uncompressed = False
    to_remove = list()
    if os.path.isfile(forward_in.replace('_R1', '_R2')) and reverse_in == 'NA' and forward_in.replace('_R1', '_R2') != forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        if forward_in.endswith('.gz'):
            forward_in = accessoryfunctions.uncompress_gzip(forward_in)
            create_uncompressed = True
            to_remove.append(forward_in)
        if reverse_in.endswith('.gz'):
            reverse_in = accessoryfunctions.uncompress_gzip(reverse_in)
            create_uncompressed = True
            to_remove.append(reverse_in)
        cmd = 'jellyfish count -m {} -C -s {} -o {} {} -F 2 {} {}'.format(str(kmer_size), hash_size, count_file,
                                                                          options, forward_in, reverse_in)
    elif reverse_in == 'NA':
        cmd = 'jellyfish count -m {} -C -s {} -o {} {} {}'.format(str(kmer_size), hash_size, count_file,
                                                                  options, forward_in)
    else:
        if forward_in.endswith('.gz'):
            forward_in = accessoryfunctions.uncompress_gzip(forward_in)
            create_uncompressed = True
            to_remove.append(forward_in)
        if reverse_in.endswith('.gz'):
            reverse_in = accessoryfunctions.uncompress_gzip(reverse_in)
            create_uncompressed = True
            to_remove.append(reverse_in)
        cmd = 'jellyfish count -m {} -C -s {} -o {} {} -F 2 {} {}'.format(str(kmer_size), hash_size, count_file,
                                                                          options, forward_in, reverse_in)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if create_uncompressed:
        for item in to_remove:
            os.remove(item)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #13
0
파일: kmc.py 프로젝트: lowandrew/BioTools
def dump(database, output, min_occurences=1, max_occurences=250):
    """
    Dumps output from kmc database into tab-delimited format.
    :param database: Database generated by kmc.
    :param output: Name for output.
    :param min_occurences: Minimum number of times kmer must be in database to be dumped.
    :param max_occurences: Maximum number of times a kmer can be seen and still be dumped.
    :return: Stdout and stderr from kmc.
    """
    cmd = 'kmc_tools dump -ci{} -cx{} {} {}'.format(min_occurences,
                                                    max_occurences, database,
                                                    output)
    out, err = accessoryfunctions.run_subprocess(cmd)
    return out, err
예제 #14
0
파일: kmc.py 프로젝트: lowandrew/BioTools
def kmc(forward_in,
        database_name,
        min_occurrences=1,
        reverse_in='NA',
        k=31,
        cleanup=True,
        tmpdir='tmp',
        **kwargs):
    """
    Runs kmc to count kmers.
    :param forward_in: Forward input reads. Assumed to be fastq.
    :param database_name: Name for output kmc database.
    :param min_occurrences: Minimum number of times kmer must be seen to be included in database.
    :param reverse_in: Reverse input reads. Automatically found.
    :param k: Kmer size. Default 31.
    :param cleanup: If true, deletes tmpdir that kmc needs.
    :param tmpdir: Temporary directory to store intermediary kmc files. Default tmp.
    :param kwargs: Other kmc arguments in parameter='argument' format.
    :return: Stdout and stderr from kmc.
    """
    # Create the tmpdir kmc needs if it isn't already present.
    if not os.path.isdir(tmpdir):
        os.makedirs(tmpdir)
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            'R1', 'R2')) and reverse_in == 'NA' and 'R1' in forward_in:
        reverse_in = forward_in.replace('R1', 'R2')
        filelist = os.path.join(tmpdir, 'filelist.txt')
        with open(filelist, 'w') as f:
            f.write(forward_in + '\n')
            f.write(reverse_in + '\n')
        cmd = 'kmc -k{} -ci{} {} @{} {} {}'.format(k, min_occurrences, options,
                                                   filelist, database_name,
                                                   tmpdir)
    elif reverse_in == 'NA':
        cmd = 'kmc -k{} -ci{} {} {} {} {}'.format(k, min_occurrences, options,
                                                  forward_in, database_name,
                                                  tmpdir)
    else:
        filelist = os.path.join(tmpdir, 'filelist.txt')
        with open(filelist, 'w') as f:
            f.write(forward_in + '\n')
            f.write(reverse_in + '\n')
        cmd = 'kmc -k{} -ci{} {} @{} {} {}'.format(k, min_occurrences, options,
                                                   filelist, database_name,
                                                   tmpdir)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if cleanup:
        shutil.rmtree(tmpdir)
    return out, err
예제 #15
0
파일: kmc.py 프로젝트: carden24/OLCTools
def union(database_1, database_2, results, returncmd=False):
    """
    Finds kmers that are present in either of the two databases provided (as well as reads found in both).
    :param database_1: First database generated by kmc.
    :param database_2: Second database generated by kmc.
    :param results: Result database, containing reads in either database 1 or 2 (or both).
    :param returncmd: If true, will return the command used to call KMC as well as out and err.
    :return: Stdout and stderr from kmc.
    """
    cmd = 'kmc_tools union {} {} {}'.format(database_1, database_2, results)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #16
0
파일: kmc.py 프로젝트: carden24/OLCTools
def intersect(database_1, database_2, results, returncmd=False):
    """
    Finds kmers that are present in 2 databases.
    :param database_1: First database generated by kmc.
    :param database_2: Second database generated by kmc.
    :param results: Result database, containing reads in both database 1 and 2.
    :param returncmd: If true, will return the command used to call KMC as well as out and err.
    :return: Stdout and stderr from kmc.
    """
    cmd = 'kmc_tools intersect {} {} {}'.format(database_1, database_2, results)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #17
0
def tadpole(forward_in,
            forward_out,
            reverse_in='NA',
            returncmd=False,
            reverse_out='NA',
            mode='correct',
            **kwargs):
    """
    Runs tadpole. Default is to run in correction mode, but other modes ('contig', 'extend') can also be specified.
    :param forward_in: Forward input reads.
    :param forward_out: Forward output reads.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param reverse_in: Reverse reads. Only specify if not following _R1/_R2 convention/not in same folder as input.
    :param reverse_out: Reverse output reads. Automatically generated unless specified.
    :param mode: Mode to run tadpole in. Default is 'correct'.
    :param kwargs: Other arguments to give to tadpole in parameter='argument' format. See tadpole documentation for full list.
    :return: out and err: stdout string and stderr string from running tadpole.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        if reverse_out == 'NA':
            if '_R1' in forward_out:
                reverse_out = forward_out.replace('_R1', '_R2')
            else:
                raise ValueError(
                    'If you do not specify reverse_out, forward_out must contain _R1.\n\n'
                )
        cmd = 'tadpole.sh in1={} in2={} out1={} out2={} mode={} {}'.format(
            forward_in, reverse_in, forward_out, reverse_out, mode, options)
    elif reverse_in == 'NA':
        cmd = 'tadpole.sh in={} out={} mode={} {}'.format(
            forward_in, forward_out, mode, options)
    else:
        if reverse_out == 'NA':
            raise ValueError('Reverse output reads must be specified.')
        cmd = 'tadpole.sh in1={} in2={} out1={} out2={} mode={} {}'.format(
            forward_in, reverse_in, forward_out, reverse_out, mode, options)
    if not os.path.isfile(forward_out):
        out, err = accessoryfunctions.run_subprocess(cmd)
    else:
        out = str()
        err = str()
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #18
0
def dump(mer_file, output_file='counts.fasta', options='', returncmd=False):
    """
    Dumps output from jellyfish count into a human-readable format.
    :param mer_file: Output from jellyfish count.
    :param output_file: Where to store output. Default counts.fasta
    :param options: Other options to pass to jellyfish. Input should be a string, with options typed as they would be
    on the command line.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :return: Stdout and stderr from calling jellyfish.
    """
    cmd = 'jellyfish dump {} -o {} {}'.format(mer_file, output_file, options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #19
0
def dedupe(input_file, output_file, returncmd=False, **kwargs):
    """
    Runs dedupe from the bbtools package.
    :param input_file: Input file.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param output_file: Output file.
    :param kwargs: Arguments to give to dedupe in parameter=argument format. See dedupe documentation for full list.
    :return: out and err: stdout string and stderr string from running dedupe.
    """
    options = kwargs_to_string(kwargs)
    cmd = 'dedupe.sh in={} out={}{}'.format(input_file, output_file, options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #20
0
def screen(*args, output_file='screen.tab', threads=1, **kwargs):
    """
    Wrapper for mash screen. Requires mash v2.0 or higher.
    :param args: Files you want to screen. First argument must be a sketch.
    :param output_file: Output to write containment info to.
    :param threads: Number of threads to run mash on.
    :param kwargs: Other arguments, in parameter='argument' format. If parameter is just a switch, do parameter=''
    :return: stdout and stderr from mash screen
    """
    options = kwargs_to_string(kwargs)
    cmd = 'mash screen '
    for arg in args:
        cmd += arg + ' '
    cmd += ' -p {} {} > {}'.format(str(threads), options, output_file)
    out, err = accessoryfunctions.run_subprocess(cmd)
    return out, err
예제 #21
0
def dist(*args, output_file='distances.tab', threads=1, **kwargs):
    """
    Wrapper for mash dist.
    :param args: Files you want to find distances between. Can be
    :param output_file: Output file to write your distances to. Default distances.tab
    :param threads: Number of threads to run mash on.
    :param kwargs: Other arguments, in parameter='argument' format. If parameter is just a switch, do parameter=''
    :return: stdout and stderr from mash dist
    """
    options = kwargs_to_string(kwargs)
    cmd = 'mash dist '
    for arg in args:
        cmd += arg + ' '
    cmd += ' -p {} {} > {}'.format(str(threads), options, output_file)
    out, err = accessoryfunctions.run_subprocess(cmd)
    return out, err
예제 #22
0
def bbduk_filter(reference,
                 forward_in,
                 forward_out,
                 returncmd=False,
                 reverse_in='NA',
                 reverse_out='NA',
                 **kwargs):
    """
    Uses bbduk to filter out reads that have kmers matching to a reference.
    :param reference: Reference you want to pull reads out for. Should be in fasta format.
    :param forward_in: Forward reads you want to quality trim.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param forward_out: Output forward reads.
    :param reverse_in: Reverse input reads. Don't need to be specified if _R1/_R2 naming convention is used.
    :param reverse_out: Reverse output reads. Don't need to be specified if _R1/_R2 convention is used.
    :param kwargs: Other arguments to give to bbduk in parameter=argument format. See bbduk documentation for full list.
    :return: out and err: stdout string and stderr string from running bbduk.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        if reverse_out == 'NA':
            if '_R1' in forward_out:
                reverse_out = forward_out.replace('_R1', '_R2')
            else:
                raise ValueError(
                    'If you do not specify reverse_out, forward_out must contain _R1.\n\n'
                )
        cmd = 'bbduk.sh in={} in2={} out={} out2={} ref={}{}'.format(
            forward_in, reverse_in, forward_out, reverse_out, reference,
            options)
    elif reverse_in == 'NA':
        cmd = 'bbduk.sh in={} out={} ref={}{}'.format(forward_in, forward_out,
                                                      reference, options)
    else:
        if reverse_out == 'NA':
            raise ValueError('Reverse output reads must be specified.')
        cmd = 'bbduk.sh in={} in2={} out={} out2={} ref={}{}'.format(
            forward_in, reverse_in, forward_out, reverse_out, reference,
            options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #23
0
파일: kmc.py 프로젝트: carden24/OLCTools
def subtract(database_1, database_2, results, exclude_below=1, returncmd=False):
    """
    Subtracts database 2 from database 1. Results can then be dumped to view what kmers are present only in database 1.
    :param database_1: First database generated by kmc.
    :param database_2: Second database generated by kmc.
    :param results: Result database, containing reads in both database 1 but not in 2..
    :param exclude_below: Don't subtract kmers from database 1 that have less than this many occurrences
    in database 2.
    :param returncmd: If true, will return the command used to call KMC as well as out and err.
    :return: Stdout and stderr from kmc.
    """
    cmd = 'kmc_tools kmers_subtract {} {} -ci{} {}'.format(database_1, database_2, str(exclude_below), results)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #24
0
def bbnorm(forward_in,
           forward_out,
           returncmd=False,
           reverse_in='NA',
           reverse_out='NA',
           **kwargs):
    """
    Runs bbnorm to normalize read depth. Default target kmer depth is left at bbnorm's default, which is 100.
    :param forward_in: Forward input reads.
    :param forward_out: Forward output reads.
    :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value.
    :param reverse_in: Reverse reads. Only specify if not following _R1/_R2 convention/not in same folder as input.
    :param reverse_out: Reverse output reads. Automatically generated unless specified.
    :param kwargs: Other arguments to give to bbnorm in parameter='argument' format. See bbnorm documentation for full list.
    :return: out and err: stdout string and stderr string from running bbnorm.
    """
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        if reverse_out == 'NA':
            if '_R1' in forward_out:
                reverse_out = forward_out.replace('_R1', '_R2')
            else:
                raise ValueError(
                    'If you do not specify reverse_out, forward_out must contain _R1.\n\n'
                )
        cmd = 'bbnorm.sh in1={} in2={} out={} out2={} {}'.format(
            forward_in, reverse_in, forward_out, reverse_out, options)
    elif reverse_in == 'NA':
        cmd = 'bbnorm.sh in={} out={} {}'.format(forward_in, forward_out,
                                                 options)
    else:
        if reverse_out == 'NA':
            raise ValueError('Reverse output reads must be specified.')
        cmd = 'bbnorm.sh in1={} in2={} out1={} out2={} {}'.format(
            forward_in, reverse_in, forward_out, reverse_out, options)
    if not os.path.isfile(forward_out):
        out, err = accessoryfunctions.run_subprocess(cmd)
    else:
        out = str()
        err = str()
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #25
0
def sketch(*args, output_sketch='sketch.msh', threads=1, **kwargs):
    """
    Wrapper for mash sketch.
    :param args: Files you want to sketch. Any number can be passed in, file patterns (i.e. *fasta) can be used.
    :param output_sketch: Output file for your sketch. Default sketch.msh.
    :param threads: Number of threads to run analysis on.
    :param kwargs: Other arguments, in parameter='argument' format. If parameter is just a switch, do parameter=''
    :return: stdout and stderr from mash sketch
    """
    options = kwargs_to_string(kwargs)
    if len(args) == 0:
        raise ValueError(
            'At least one file to sketch must be specified. You specified 0 files.'
        )
    cmd = 'mash sketch '
    for arg in args:
        cmd += arg + ' '
    cmd += '-o {} -p {} {}'.format(output_sketch, str(threads), options)
    out, err = accessoryfunctions.run_subprocess(cmd)
    return out, err
예제 #26
0
def subsample_reads(forward_in,
                    forward_out,
                    num_bases,
                    returncmd=False,
                    reverse_in='NA',
                    reverse_out='NA',
                    **kwargs):
    options = kwargs_to_string(kwargs)
    if os.path.isfile(forward_in.replace(
            '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in:
        reverse_in = forward_in.replace('_R1', '_R2')
        if reverse_out == 'NA':
            if '_R1' in forward_out:
                reverse_out = forward_out.replace('_R1', '_R2')
            else:
                raise ValueError(
                    'If you do not specify reverse_out, forward_out must contain _R1.\n\n'
                )
        cmd = 'reformat.sh in1={} in2={} out1={} out2={} samplebasestarget={} {}'.format(
            forward_in, reverse_in, forward_out, reverse_out, str(num_bases),
            options)
    elif reverse_in == 'NA':
        cmd = 'reformat.sh in={} out={} samplebasestarget={} {}'.format(
            forward_in, forward_out, str(num_bases), options)
    else:
        if reverse_out == 'NA':
            raise ValueError('Reverse output reads must be specified.')
        cmd = 'reformat.sh in1={} in2={} out1={} out2={} samplebasestarget={} {}'.format(
            forward_in, reverse_in, forward_out, reverse_out, str(num_bases),
            options)
    if not os.path.isfile(forward_out):
        out, err = accessoryfunctions.run_subprocess(cmd)
    else:
        out = str()
        err = str()
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #27
0
def dist(*args, output_file='distances.tab', threads=1, returncmd=False, **kwargs):
    """
    Wrapper for mash dist.
    :param args: Files you want to find distances between. Can be
    :param output_file: Output file to write your distances to. Default distances.tab
    :param threads: Number of threads to run mash on.
    :param kwargs: Other arguments, in parameter='argument' format. If parameter is just a switch, do parameter=''
    :param returncmd: If true, will return the command used to call mash as well as out and err.
    :return: stdout and stderr from mash dist
    """
    options = kwargs_to_string(kwargs)
    if len(args) == 0:
        raise ValueError('At least one file to sketch must be specified. You specified 0 files.')
    cmd = 'mash dist '
    for arg in args:
        cmd += arg + ' '
    cmd += ' -p {} {} > {}'.format(str(threads), options, output_file)
    out, err = accessoryfunctions.run_subprocess(cmd)
    if returncmd:
        return out, err, cmd
    else:
        return out, err
예제 #28
0
파일: prokka.py 프로젝트: carden24/OLCTools
def prokka(input_fasta, output_dir, output_name, **kwargs):
    options = kwargs_to_string(kwargs)
    cmd = 'prokka --outdir {} --prefix {} {} {}'.format(
        output_dir, output_name, options, input_fasta)
    out, err = accessoryfunctions.run_subprocess(cmd)
    return out, err