def run_blastp(infile, outfile, database, cpus, evalue=1e-10, shell=False): """ Query your cluster sequences against an AA database (designed with MIBiG in mind) using blastp. :param infile: the query cluster AA sequences file (mpfa format) :param outfile: the resulting blastp output table file :param database: path to the blast database files, generated with makeblastdb command :param evalue: the evalue cutoff (default=1e-10) :param cpus: the number of cpus to use :param shell: whether to use the shell NOT RECOMMENDED (default=False) :return: the STDERR/STDOUT """ # Deal with any spaces in the file paths if ' ' in infile: infile = ''.join(['"', infile, '"']) if ' ' in outfile: outfile = ''.join(['"', outfile, '"']) if ' ' in database: database = ''.join(['"', database, '"']) # DEBUG # print('\n') # print(infile) # print(outfile) # print(database) cmd = [ 'blastp', '-db', str(database), '-query', str(infile), '-out', str(outfile), '-num_threads', str(cpus), '-outfmt', '6', '-max_hsps', '1', '-evalue', evalue ] return run_command(cmd, shell=shell)
def run_makeblastdb(infile, outfile, shell=False): """ Query your cluster sequences against an AA database (designed with MIBiG in mind) using blastp. :param infile: the query cluster AA sequences file (mpfa format) :param outfile: the resulting blastp databsefile :param shell: whether to use the shell NOT RECOMMENDED (default=False) :return: the STDERR/STDOUT """ # Deal with any spaces in the file paths if ' ' in infile: infile = ''.join(['"', infile, '"']) if ' ' in outfile: outfile = ''.join(['"', outfile, '"']) # DEBUG # print('\n') # print(infile) # print(outfile) # print(database) cmd = [ 'makeblastdb', '-in', str(infile), '-out', str(outfile), '-dbtype', 'prot', '-hash_index' ] # print(cmd) return run_command(cmd, shell=shell)
def run_shogun(inpath, outpath, utree_db, threads, shell=False): """ Run SHOGUN LCA using UTree. :param input: Directory with the FASTA files, ".fna" extension required :param output: Output directory for the result files :param utree_db: path to the UTree database (".ctr" extension :param threads: :param cpus: the number of cpus to use :param shell: whether to use the shell NOT RECOMMENDED (default=False) :return: the STDERR/STDOUT """ # Deal with any spaces in the file paths if ' ' in inpath: inpath = ''.join(['"', inpath, '"']) if ' ' in outpath: outpath = ''.join(['"', outpath, '"']) if ' ' in utree_db: utree_db = ''.join(['"', utree_db, '"']) cmd = [ 'shogun_utree_lca', '-i', str(inpath), '-o', str(outpath), '-u', str(utree_db), '-p', str(threads) ] return run_command(cmd, shell=shell)
def bowtie2_align(infile, outfile, database, alignments_to_report=32, num_threads=SETTINGS.N_jobs, shell=False): """ Search a bowtie2 index with multiple alignment. :param infile: the query FASTA file :param outfile: the resulting SAM file :param database: path to the bowtie2 index :param alignments_to_report: the number of alignments to report (default=32) :param num_threads: the number of threads to use (default=SETTINGS) :param shell: whether to use the shell NOT RECOMMENDED (default=False) :return: the STDERR/STDOUT """ cmd = ['bowtie2', '--no-unal', '-x', database, '-S', outfile, '--np', '0', '--mp', '"1,1"', '--rdg', '"0,1"', '--rfg', '"0,1"', '--score-min', '"L,0,-0.02"', '--norc', '-f', infile, '--very-sensitive', '-k', str(alignments_to_report), '-p', str(num_threads), '--no-hd'] return run_command(cmd, shell=shell)
def utree_compress(input_uncompressed_tree, output_compressed_tree, shell=False): # usage: xtree-compress preTree.ubt compTree.ctr cmd = [ 'utree-compress', input_uncompressed_tree, output_compressed_tree ] return run_command(cmd, shell=shell)
def utree_search_gg(input_compressed_tree, input_fasta_to_search, output, shell=False): # usage: xtree-searchGG compTreeGG.ctr fastaToSearch.fa output.txt cmd = [ 'utree-search_gg', input_compressed_tree, input_fasta_to_search, output ] return run_command(cmd, shell=shell)
def utree_build(input_fasta, input_fasta_labels, output_uncompressed_tree, threads=SETTINGS.N_jobs, shell=False): # usage: utree-build input_fasta.fa labels.map output.ubt [threads] cmd = [ 'utree-build', input_fasta, input_fasta_labels, output_uncompressed_tree, threads, ] return run_command(cmd, shell=shell)
def bowtie2_build(infile, outfile, offrate=3, shell=False): """ This function will build a bowtie2 index with a given infile and output to the outfile :param infile: the FASTA file to build the index with :param outfile: the prefix for the bowtie2 index :param offrate: offrate for the index (default=3) :param shell: whether to use the shell NOT RECOMMENDED (default=False) :return: teh STDERR/STDOUT """ cmd = ['bowtie2-build', '-f', '-o', str(offrate), infile, outfile] return run_command(cmd, shell=shell)
def embalmer_align(queries, references, outfile, mode='CAPITALIST2', num_threads=SETTINGS.N_jobs, shell=False): cmd = [ 'embalmer', '--queries', queries, '--references', references, '--output', outfile, '--threads', str(num_threads), '--mode', mode, '--shear', '--latency', 16, '--id', .985, ] return run_command(cmd, shell=shell)
def dustmasker(infile, outfile, infmt='fasta', outfmt='fasta', shell=False): """ Search a bowtie2 index with multiple alignment. :param infile: the query FASTA file :param outfile: the resulting SAM file :param database: path to the bowtie2 index :param alignments_to_report: the number of alignments to report (default=32) :param num_threads: the number of threads to use (default=SETTINGS) :param shell: whether to use the shell NOT RECOMMENDED (default=False) :return: the STDERR/STDOUT """ cmd = [ 'dustmasker', '-in', infile, '-out', outfile, '-infmt', infmt, '-outfmt', outfmt ] cmd = [str(i) for i in cmd] return run_command(cmd, shell=shell)
def run_beta_div(infile, id_level, mapping, variable, metric, outdir, shell=False): """ Calculate beta diversity for the matched OFUs and compare between variables of interest from a mapping file. :param infile: the OFU table at a given cluster ID :param id_level: the clustering height level (0-100) :param mapping: the mapping file in standard format (#SampleID is col 1 header) :param variable: the mapping file header for the variable of interest :param metric: the distance method to use: manhattan, euclidean, canberra, bray, kulczynski, jaccard, gower, altGower, morisita, horn, mountford, raup , binomial, chao, cao, or mahalanobis. :param outdir: the output directory/prefix - given as a string that will be appended with the ID and .png file extension :param shell: whether to use the shell NOT RECOMMENDED (default=False) :return: the STDERR/STDOUT """ # Deal with any spaces in the file paths if ' ' in infile: infile = ''.join(['"', infile, '"']) if ' ' in mapping: mapping = ''.join(['"', mapping, '"']) if ' ' in outdir: outdir = ''.join(['"', outdir, '"']) # DEBUG # print('\n') # print(infile) # print(outfile) # print(database) cmd = [ 'ofu_beta_div.R', str(infile), str(id_level), str(mapping), str(variable), str(metric), str(outdir) ] return run_command(cmd, shell=shell)
def run_alpha_div(infile, id_level, mapping, variable, metric, outdir, shell=False): """ Calculate alpha diversity for the matched OFUs and compare between variables of interest from a mapping file. :param infile: the OFU table at a given cluster ID :param id_level: the clustering height level (0-100) :param mapping: the mapping file in standard format (#SampleID is col 1 header) :param variable: the mapping file header for the variable of interest :param metric: the diversity metric to use: shannon, simpson, or invsimpson :param outdir: the output directory/prefix - given as a string that will be appended with the metric, ID level, and .png file extension :param shell: whether to use the shell NOT RECOMMENDED (default=False) :return: the STDERR/STDOUT """ # Deal with any spaces in the file paths if ' ' in infile: infile = ''.join(['"', infile, '"']) if ' ' in mapping: mapping = ''.join(['"', mapping, '"']) if ' ' in outdir: outdir = ''.join(['"', outdir, '"']) # DEBUG # print('\n') # print(infile) # print(outfile) # print(database) cmd = [ 'ofu_beta_div.R', str(infile), str(id_level), str(mapping), str(variable), str(metric), str(outdir) ] return run_command(cmd, shell=shell)