def repair_reads(forward_in, forward_out, returncmd=False, reverse_in='NA', reverse_out='NA'): if os.path.isfile(forward_in.replace( '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in: reverse_in = forward_in.replace('_R1', '_R2') if reverse_out == 'NA': if '_R1' in forward_out: reverse_out = forward_out.replace('_R1', '_R2') else: raise ValueError( 'If you do not specify reverse_out, forward_out must contain _R1.\n\n' ) cmd = 'repair.sh in1={} in2={} out1={} out2={} tossbrokenreads=t repair=t overwrite=t'\ .format(forward_in, reverse_in, forward_out, reverse_out) else: if reverse_out == 'NA': raise ValueError('Reverse output reads must be specified.') cmd = 'repair.sh in1={} in2={} out1={} out2={} tossbrokenreads=t repair=t overwrite=t'\ .format(forward_in, reverse_in, forward_out, reverse_out) if not os.path.isfile(forward_out): out, err = accessoryfunctions.run_subprocess(cmd) else: out = str() err = str() if returncmd: return out, err, cmd else: return out, err
def kmercountexact(forward_in, reverse_in='NA', returncmd=False, **kwargs): """ Wrapper for kmer count exact. :param forward_in: Forward input reads. :param reverse_in: Reverse input reads. Found automatically for certain conventions. :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value. :param kwargs: Arguments to give to kmercountexact in parameter='argument' format. See kmercountexact documentation for full list. :return: out and err: stdout string and stderr string from running kmercountexact. """ options = kwargs_to_string(kwargs) if os.path.isfile(forward_in.replace( '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in: reverse_in = forward_in.replace('_R1', '_R2') cmd = 'kmercountexact.sh in={} in2={} {}'.format( forward_in, reverse_in, options) elif reverse_in == 'NA': cmd = 'kmercountexact.sh in={} {}'.format(forward_in, options) else: cmd = 'kmercountexact.sh in={} in2={} {}'.format( forward_in, reverse_in, options) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err
def seal(reference, forward_in, output_file, reverse_in='NA', returncmd=False, **kwargs): """ Runs seal from the bbtools package. :param reference: Reference file, in fasta format. :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value. :param forward_in: Forward reads, fastq format. :param output_file: Output file to put rpkm statistics into. :param reverse_in: Reverse reads. Not necessary to specify if in same folder and follow _R1/_R2 convention. :param kwargs: Arguments to give to seal in parameter=argument format. See seal documentation for full list. :return: out and err: stdout string and stderr string from running seal. """ options = kwargs_to_string(kwargs) if os.path.isfile(forward_in.replace( '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in: reverse_in = forward_in.replace('_R1', '_R2') cmd = 'seal.sh ref={} in={} in2={} rpkm={} nodisk{}'.format( reference, forward_in, reverse_in, output_file, options) elif reverse_in == 'NA': cmd = 'seal.sh ref={} in={} rpkm={} nodisk{}'.format( reference, forward_in, output_file, options) else: cmd = 'seal.sh ref={} in={} in2={} rpkm={} nodisk{}'.format( reference, forward_in, reverse_in, output_file, options) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err
def screen(*args, output_file='screen.tab', threads=1, returncmd=False, **kwargs): """ Wrapper for mash screen. Requires mash v2.0 or higher. :param args: Files you want to screen. First argument must be a sketch. :param output_file: Output to write containment info to. :param threads: Number of threads to run mash on. :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value. :param kwargs: Other arguments, in parameter='argument' format. If parameter is just a switch, do parameter='' :return: stdout and stderr from mash screen """ options = kwargs_to_string(kwargs) cmd = 'mash screen ' for arg in args: cmd += arg + ' ' cmd += ' -p {} {} | sort -gr > {}'.format(str(threads), options, output_file) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err
def bbmerge(forward_in, merged_reads, returncmd=False, reverse_in='NA', **kwargs): """ Runs bbmerge. :param forward_in: Forward input reads. Reverse reads automatically detected if present in the same folder. :param merged_reads: Output file to write merged reads to. :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value. :param reverse_in: Reverse input file, if you don't want it autodetected. :param kwargs: Other arguments to give to bbmerge in parameter='argument' format. See bbmerge documentation for full list. :return: out and err: stdout string and stderr string from running bbmerge. """ options = kwargs_to_string(kwargs) if os.path.isfile(forward_in.replace( '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in: reverse_in = forward_in.replace('_R1', '_R2') cmd = 'bbmerge.sh in={} in2={} out={} {}'.format( forward_in, reverse_in, merged_reads, options) elif reverse_in == 'NA': cmd = 'bbmerge.sh in={} out={} {}'.format(forward_in, merged_reads, options) else: cmd = 'bbmerge.sh in={} in2={} out={} {}'.format( forward_in, reverse_in, merged_reads, options) if not os.path.isfile(merged_reads): out, err = accessoryfunctions.run_subprocess(cmd) else: out = str() err = str() if returncmd: return out, err, cmd else: return out, err
def bbmap(reference, forward_in, out_bam, reverse_in='NA', returncmd=False, **kwargs): """ Wrapper for bbmap. Assumes that bbmap executable is in your $PATH. :param reference: Reference fasta. Won't be written to disk by default. If you want it to be, add nodisk='t' as an arg. :param forward_in: Input reads. Should be in fastq format. :param out_bam: Output file. Should end in .sam or .bam :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value. :param reverse_in: If your reverse reads are present and normal conventions (_R1 for forward, _R2 for reverse) are followed, the reverse reads will be followed automatically. If you want to specify reverse reads, you may do so. :param kwargs: Other arguments to give to bbmap in parameter=argument format. See bbmap documentation for full list. :return: out and err: stdout string and stderr string from running bbmap. """ options = kwargs_to_string(kwargs) if os.path.isfile(forward_in.replace( '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in: reverse_in = forward_in.replace('_R1', '_R2') cmd = 'bbmap.sh ref={} in={} in2={} out={} nodisk{}'.format( reference, forward_in, reverse_in, out_bam, options) elif reverse_in == 'NA': cmd = 'bbmap.sh ref={} in={} out={} nodisk{}'.format( reference, forward_in, out_bam, options) else: cmd = 'bbmap.sh ref={} in={} in2={} out={} nodisk{}'.format( reference, forward_in, reverse_in, out_bam, options) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err
def dist(*args, output_file='distances.tab', threads=1, returncmd=False, **kwargs): """ Wrapper for mash dist. :param args: Files you want to find distances between. Can be :param output_file: Output file to write your distances to. Default distances.tab :param threads: Number of threads to run mash on. :param kwargs: Other arguments, in parameter='argument' format. If parameter is just a switch, do parameter='' :param returncmd: If true, will return the command used to call mash as well as out and err. :return: stdout and stderr from mash dist """ options = kwargs_to_string(kwargs) if len(args) == 0: raise ValueError( 'At least one file to sketch must be specified. You specified 0 files.' ) cmd = 'mash dist ' for arg in args: cmd += arg + ' ' cmd += ' -p {} {} > {}'.format(str(threads), options, output_file) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err
def sketch(*args, output_sketch='sketch.msh', threads=1, returncmd=False, **kwargs): """ Wrapper for mash sketch. :param args: Files you want to sketch. Any number can be passed in, file patterns (i.e. *fasta) can be used. :param output_sketch: Output file for your sketch. Default sketch.msh. :param threads: Number of threads to run analysis on. :param kwargs: Other arguments, in parameter='argument' format. If parameter is just a switch, do parameter='' :param returncmd: If true, will return the command used to call mash as well as out and err. :return: stdout and stderr from mash sketch """ options = kwargs_to_string(kwargs) if len(args) == 0: raise ValueError( 'At least one file to sketch must be specified. You specified 0 files.' ) cmd = 'mash sketch ' for arg in args: cmd += arg + ' ' cmd += '-o {} -p {} {}'.format(output_sketch, str(threads), options) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err
def reformat_reads(forward_in, forward_out, returncmd=False, reverse_in='NA', reverse_out='NA', **kwargs): """ :param forward_in: :param forward_out: :param returncmd: :param reverse_in: :param reverse_out: :param kwargs: :return: """ options = kwargs_to_string(kwargs) if os.path.isfile(forward_in.replace( '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in: reverse_in = forward_in.replace('_R1', '_R2') if reverse_out == 'NA': if '_R1' in forward_out: reverse_out = forward_out.replace('_R1', '_R2') else: raise ValueError( 'If you do not specify reverse_out, forward_out must contain _R1.\n\n' ) cmd = 'reformat.sh in1={forward_in} in2={reverse_in} out1={forward_out} out2={reverse_out} ' \ 'tossbrokenreads=t ow=t{options}'\ .format(forward_in=forward_in, reverse_in=reverse_in, forward_out=forward_out, reverse_out=reverse_out, options=options) elif reverse_in == 'NA' or reverse_in is None: cmd = 'reformat.sh in={forward_in} out={forward_out} tossbrokenreads=t ow=t{options}'\ .format(forward_in=forward_in, forward_out=forward_out, options=options) else: if reverse_out == 'NA': raise ValueError('Reverse output reads must be specified.') cmd = 'reformat.sh in1={forward_in} in2={reverse_in} out1={forward_out} out2={reverse_out} ' \ 'tossbrokenreads=t ow=t{options}'\ .format(forward_in=forward_in, reverse_in=reverse_in, forward_out=forward_out, reverse_out=reverse_out, options=options) if not os.path.isfile(forward_out): out, err = accessoryfunctions.run_subprocess(cmd) else: out = str() err = str() if returncmd: return out, err, cmd else: return out, err
def kmc(forward_in, database_name, min_occurrences=1, reverse_in='NA', k=31, cleanup=True, returncmd=False, tmpdir='tmp', **kwargs): """ Runs kmc to count kmers. :param forward_in: Forward input reads. Assumed to be fastq. :param database_name: Name for output kmc database. :param min_occurrences: Minimum number of times kmer must be seen to be included in database. :param reverse_in: Reverse input reads. Automatically found. :param k: Kmer size. Default 31. :param cleanup: If true, deletes tmpdir that kmc needs. :param tmpdir: Temporary directory to store intermediary kmc files. Default tmp. :param returncmd: If true, will return the command used to call KMC as well as out and err. :param kwargs: Other kmc arguments in parameter='argument' format. :return: Stdout and stderr from kmc. """ # Create the tmpdir kmc needs if it isn't already present. if not os.path.isdir(tmpdir): os.makedirs(tmpdir) options = kwargs_to_string(kwargs) if os.path.isfile(forward_in.replace( '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in: reverse_in = forward_in.replace('_R1', '_R2') filelist = os.path.join(tmpdir, 'filelist.txt') with open(filelist, 'w') as f: f.write(forward_in + '\n') f.write(reverse_in + '\n') cmd = 'kmc -k{} -ci{} {} @{} {} {}'.format(k, min_occurrences, options, filelist, database_name, tmpdir) elif reverse_in == 'NA': cmd = 'kmc -k{} -ci{} {} {} {} {}'.format(k, min_occurrences, options, forward_in, database_name, tmpdir) else: filelist = os.path.join(tmpdir, 'filelist.txt') with open(filelist, 'w') as f: f.write(forward_in + '\n') f.write(reverse_in + '\n') cmd = 'kmc -k{} -ci{} {} @{} {} {}'.format(k, min_occurrences, options, filelist, database_name, tmpdir) out, err = accessoryfunctions.run_subprocess(cmd) if cleanup: shutil.rmtree(tmpdir) if returncmd: return out, err, cmd else: return out, err
def validate_reads(forward_in, returncmd=False, reverse_in='NA'): if os.path.isfile(forward_in.replace( '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in: reverse_in = forward_in.replace('_R1', '_R2') cmd = 'reformat.sh in1={} in2={} vpair'.format(forward_in, reverse_in) elif reverse_in == 'NA': cmd = 'reformat.sh in={}'.format(forward_in) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err
def union(database_1, database_2, results, returncmd=False): """ Finds kmers that are present in either of the two databases provided (as well as reads found in both). :param database_1: First database generated by kmc. :param database_2: Second database generated by kmc. :param results: Result database, containing reads in either database 1 or 2 (or both). :param returncmd: If true, will return the command used to call KMC as well as out and err. :return: Stdout and stderr from kmc. """ cmd = 'kmc_tools union {} {} {}'.format(database_1, database_2, results) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err
def tadpole(forward_in, forward_out, reverse_in='NA', returncmd=False, reverse_out='NA', mode='correct', **kwargs): """ Runs tadpole. Default is to run in correction mode, but other modes ('contig', 'extend') can also be specified. :param forward_in: Forward input reads. :param forward_out: Forward output reads. :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value. :param reverse_in: Reverse reads. Only specify if not following _R1/_R2 convention/not in same folder as input. :param reverse_out: Reverse output reads. Automatically generated unless specified. :param mode: Mode to run tadpole in. Default is 'correct'. :param kwargs: Other arguments to give to tadpole in parameter='argument' format. See tadpole documentation for full list. :return: out and err: stdout string and stderr string from running tadpole. """ options = kwargs_to_string(kwargs) if os.path.isfile(forward_in.replace( '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in: reverse_in = forward_in.replace('_R1', '_R2') if reverse_out == 'NA': if '_R1' in forward_out: reverse_out = forward_out.replace('_R1', '_R2') else: raise ValueError( 'If you do not specify reverse_out, forward_out must contain _R1.\n\n' ) cmd = 'tadpole.sh in1={} in2={} out1={} out2={} mode={} {}'.format( forward_in, reverse_in, forward_out, reverse_out, mode, options) elif reverse_in == 'NA': cmd = 'tadpole.sh in={} out={} mode={} {}'.format( forward_in, forward_out, mode, options) else: if reverse_out == 'NA': raise ValueError('Reverse output reads must be specified.') cmd = 'tadpole.sh in1={} in2={} out1={} out2={} mode={} {}'.format( forward_in, reverse_in, forward_out, reverse_out, mode, options) if not os.path.isfile(forward_out): out, err = accessoryfunctions.run_subprocess(cmd) else: out = str() err = str() if returncmd: return out, err, cmd else: return out, err
def dedupe(input_file, output_file, returncmd=False, **kwargs): """ Runs dedupe from the bbtools package. :param input_file: Input file. :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value. :param output_file: Output file. :param kwargs: Arguments to give to dedupe in parameter=argument format. See dedupe documentation for full list. :return: out and err: stdout string and stderr string from running dedupe. """ options = kwargs_to_string(kwargs) cmd = 'dedupe.sh in={} out={}{}'.format(input_file, output_file, options) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err
def intersect(database_1, database_2, results, returncmd=False): """ Finds kmers that are present in 2 databases. :param database_1: First database generated by kmc. :param database_2: Second database generated by kmc. :param results: Result database, containing reads in both database 1 and 2. :param returncmd: If true, will return the command used to call KMC as well as out and err. :return: Stdout and stderr from kmc. """ cmd = 'kmc_tools intersect {} {} {}'.format(database_1, database_2, results) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err
def randomreads(reference, length, reads, out_fastq, paired=False, returncmd=False, **kwargs): """ Wrapper for bbmap. Assumes that bbmap executable is in your $PATH. :param reference: Reference fasta. Won't be written to disk by default. If you want it to be, add nodisk='t' as an arg. :param length: Length of reads to simulate :param reads: Number of reads to simulate :param out_fastq: Output file. Should end in .fastq or .fastq.gz :param paired: Create paired FASTQ files rather than single :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value. :param kwargs: Other arguments to give to bbmap in parameter=argument format. See documentation for full list. :return: out and err (and cmd if specified): stdout string and stderr string from running bbmap. """ options = kwargs_to_string(kwargs) # If the paired option is selected, set the name of the reverse reads to be the same as the forward reads # but replace _R1 with _R2 if paired: out_fastq2 = out_fastq.replace('_R1', '_R2') # Create the call to randomreads - use paired=t cmd = 'randomreads.sh ref={ref} out={out} out2={out2} length={length} reads={reads} paired=t{options}'\ .format(ref=reference, out=out_fastq, out2=out_fastq2, length=length, reads=reads, options=options) else: cmd = 'randomreads.sh ref={ref} out={out} length={length} reads={reads}{options}'\ .format(ref=reference, out=out_fastq, length=length, reads=reads, options=options) if not os.path.isfile(out_fastq): out, err = accessoryfunctions.run_subprocess(cmd) else: out = str() err = str() if returncmd: return out, err, cmd else: return out, err
def bbduk_filter(reference, forward_in, forward_out, returncmd=False, reverse_in='NA', reverse_out='NA', **kwargs): """ Uses bbduk to filter out reads that have kmers matching to a reference. :param reference: Reference you want to pull reads out for. Should be in fasta format. :param forward_in: Forward reads you want to quality trim. :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value. :param forward_out: Output forward reads. :param reverse_in: Reverse input reads. Don't need to be specified if _R1/_R2 naming convention is used. :param reverse_out: Reverse output reads. Don't need to be specified if _R1/_R2 convention is used. :param kwargs: Other arguments to give to bbduk in parameter=argument format. See bbduk documentation for full list. :return: out and err: stdout string and stderr string from running bbduk. """ options = kwargs_to_string(kwargs) if os.path.isfile(forward_in.replace( '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in: reverse_in = forward_in.replace('_R1', '_R2') if reverse_out == 'NA': if '_R1' in forward_out: reverse_out = forward_out.replace('_R1', '_R2') else: raise ValueError( 'If you do not specify reverse_out, forward_out must contain _R1.\n\n' ) cmd = 'bbduk.sh in={} in2={} out={} out2={} ref={}{}'.format( forward_in, reverse_in, forward_out, reverse_out, reference, options) elif reverse_in == 'NA': cmd = 'bbduk.sh in={} out={} ref={}{}'.format(forward_in, forward_out, reference, options) else: if reverse_out == 'NA': raise ValueError('Reverse output reads must be specified.') cmd = 'bbduk.sh in={} in2={} out={} out2={} ref={}{}'.format( forward_in, reverse_in, forward_out, reverse_out, reference, options) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err
def bbnorm(forward_in, forward_out, returncmd=False, reverse_in='NA', reverse_out='NA', **kwargs): """ Runs bbnorm to normalize read depth. Default target kmer depth is left at bbnorm's default, which is 100. :param forward_in: Forward input reads. :param forward_out: Forward output reads. :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value. :param reverse_in: Reverse reads. Only specify if not following _R1/_R2 convention/not in same folder as input. :param reverse_out: Reverse output reads. Automatically generated unless specified. :param kwargs: Other arguments to give to bbnorm in parameter='argument' format. See bbnorm documentation for full list. :return: out and err: stdout string and stderr string from running bbnorm. """ options = kwargs_to_string(kwargs) if os.path.isfile(forward_in.replace( '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in: reverse_in = forward_in.replace('_R1', '_R2') if reverse_out == 'NA': if '_R1' in forward_out: reverse_out = forward_out.replace('_R1', '_R2') else: raise ValueError( 'If you do not specify reverse_out, forward_out must contain _R1.\n\n' ) cmd = 'bbnorm.sh in1={} in2={} out={} out2={} {}'.format( forward_in, reverse_in, forward_out, reverse_out, options) elif reverse_in == 'NA': cmd = 'bbnorm.sh in={} out={} {}'.format(forward_in, forward_out, options) else: if reverse_out == 'NA': raise ValueError('Reverse output reads must be specified.') cmd = 'bbnorm.sh in1={} in2={} out1={} out2={} {}'.format( forward_in, reverse_in, forward_out, reverse_out, options) if not os.path.isfile(forward_out): out, err = accessoryfunctions.run_subprocess(cmd) else: out = str() err = str() if returncmd: return out, err, cmd else: return out, err
def subtract(database_1, database_2, results, exclude_below=1, returncmd=False): """ Subtracts database 2 from database 1. Results can then be dumped to view what kmers are present only in database 1. :param database_1: First database generated by kmc. :param database_2: Second database generated by kmc. :param results: Result database, containing reads in both database 1 but not in 2.. :param exclude_below: Don't subtract kmers from database 1 that have less than this many occurrences in database 2. :param returncmd: If true, will return the command used to call KMC as well as out and err. :return: Stdout and stderr from kmc. """ cmd = 'kmc_tools kmers_subtract {} {} -ci{} {}'.format( database_1, database_2, str(exclude_below), results) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err
def subsample_reads(forward_in, forward_out, num_bases, returncmd=False, reverse_in='NA', reverse_out='NA', **kwargs): options = kwargs_to_string(kwargs) if os.path.isfile(forward_in.replace( '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in: reverse_in = forward_in.replace('_R1', '_R2') if reverse_out == 'NA': if '_R1' in forward_out: reverse_out = forward_out.replace('_R1', '_R2') else: raise ValueError( 'If you do not specify reverse_out, forward_out must contain _R1.\n\n' ) cmd = 'reformat.sh in1={} in2={} out1={} out2={} samplebasestarget={} {}'.format( forward_in, reverse_in, forward_out, reverse_out, str(num_bases), options) elif reverse_in == 'NA': cmd = 'reformat.sh in={} out={} samplebasestarget={} {}'.format( forward_in, forward_out, str(num_bases), options) else: if reverse_out == 'NA': raise ValueError('Reverse output reads must be specified.') cmd = 'reformat.sh in1={} in2={} out1={} out2={} samplebasestarget={} {}'.format( forward_in, reverse_in, forward_out, reverse_out, str(num_bases), options) if not os.path.isfile(forward_out): out, err = accessoryfunctions.run_subprocess(cmd) else: out = str() err = str() if returncmd: return out, err, cmd else: return out, err
def dump(database, output, min_occurences=1, max_occurences=250, returncmd=False): """ Dumps output from kmc database into tab-delimited format. :param database: Database generated by kmc. :param output: Name for output. :param min_occurences: Minimum number of times kmer must be in database to be dumped. :param max_occurences: Maximum number of times a kmer can be seen and still be dumped. :param returncmd: If true, will return the command used to call KMC as well as out and err. :return: Stdout and stderr from kmc. """ cmd = 'kmc_tools dump {database} -ci{min} -cx{max} {output}'\ .format(database=database, min=min_occurences, max=max_occurences, output=output) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err
def prokka(input_fasta, output_dir, output_name, **kwargs): options = kwargs_to_string(kwargs) cmd = 'prokka --outdir {} --prefix {} {} {}'.format( output_dir, output_name, options, input_fasta) out, err = accessoryfunctions.run_subprocess(cmd) return out, err
def bbduk_trim(forward_in, forward_out, reverse_in='NA', reverse_out='NA', trimq=20, k=25, minlength=50, forcetrimleft=15, hdist=1, returncmd=False, **kwargs): """ Wrapper for using bbduk to quality trim reads. Contains arguments used in OLC Assembly Pipeline, but these can be overwritten by using keyword parameters. :param forward_in: Forward reads you want to quality trim. :param returncmd: If set to true, function will return the cmd string passed to subprocess as a third value. :param forward_out: Output forward reads. :param reverse_in: Reverse input reads. Don't need to be specified if _R1/_R2 naming convention is used. :param reverse_out: Reverse output reads. Don't need to be specified if _R1/_R2 convention is used. :param kwargs: Other arguments to give to bbduk in parameter=argument format. See bbduk documentation for full list. :return: out and err: stdout string and stderr string from running bbduk. """ options = kwargs_to_string(kwargs) cmd = 'which bbduk.sh' try: subprocess.check_output(cmd.split()).decode('utf-8') except subprocess.CalledProcessError: print( 'ERROR: Could not find bbduk. Plase check that the bbtools package is installed and on your $PATH.\n\n' ) raise FileNotFoundError if os.path.isfile(forward_in.replace( '_R1', '_R2')) and reverse_in == 'NA' and '_R1' in forward_in: reverse_in = forward_in.replace('_R1', '_R2') if reverse_out == 'NA': if '_R1' in forward_out: reverse_out = forward_out.replace('_R1', '_R2') else: raise ValueError( 'If you do not specify reverse_out, forward_out must contain R1.\n\n' ) cmd = 'bbduk.sh in1={f_in} in2={r_in} out1={f_out} out2={r_out} qtrim=w trimq={trimq} k={k} ' \ 'minlength={minlength} forcetrimleft={forcetrimleft} ref=adapters overwrite hdist={hdist} tpe tbo{optn}'\ .format(f_in=forward_in, r_in=reverse_in, f_out=forward_out, r_out=reverse_out, trimq=trimq, k=k, minlength=minlength, forcetrimleft=forcetrimleft, hdist=hdist, optn=options) elif reverse_in == 'NA' or reverse_in is None: cmd = 'bbduk.sh in={f_in} out={f_out} qtrim=w trimq={trimq} k={k} minlength={minlength} ' \ 'forcetrimleft={forcetrimleft} ref=adapters overwrite hdist={hdist} tpe tbo{optn}'\ .format(f_in=forward_in, f_out=forward_out, trimq=trimq, k=k, minlength=minlength, forcetrimleft=forcetrimleft, hdist=hdist, optn=options) else: if reverse_out == 'NA': raise ValueError('Reverse output reads must be specified.') cmd = 'bbduk.sh in1={f_in} in2={r_in} out1={f_out} out2={r_out} qtrim=w trimq={trimq} k={k} ' \ 'minlength={minlength} forcetrimleft={forcetrimleft} ref=adapters overwrite hdist={hdist} tpe tbo{optn}' \ .format(f_in=forward_in, r_in=reverse_in, f_out=forward_out, r_out=reverse_out, trimq=trimq, k=k, minlength=minlength, forcetrimleft=forcetrimleft, hdist=hdist, optn=options) out, err = accessoryfunctions.run_subprocess(cmd) if returncmd: return out, err, cmd else: return out, err