def run_blastp(infile, outfile, database, cpus, evalue=1e-10, shell=False):
    """
	Query your cluster sequences against an AA database (designed with MIBiG in mind) using blastp.
	:param infile: the query cluster AA sequences file (mpfa format)
	:param outfile: the resulting blastp output table file
	:param database: path to the blast database files, generated with makeblastdb command
	:param evalue: the evalue cutoff (default=1e-10)
	:param cpus: the number of cpus to use
	:param shell: whether to use the shell NOT RECOMMENDED (default=False)
	:return: the STDERR/STDOUT
	"""
    # Deal with any spaces in the file paths
    if ' ' in infile:
        infile = ''.join(['"', infile, '"'])
    if ' ' in outfile:
        outfile = ''.join(['"', outfile, '"'])
    if ' ' in database:
        database = ''.join(['"', database, '"'])
    # DEBUG
    # print('\n')
    # print(infile)
    # print(outfile)
    # print(database)

    cmd = [
        'blastp', '-db',
        str(database), '-query',
        str(infile), '-out',
        str(outfile), '-num_threads',
        str(cpus), '-outfmt', '6', '-max_hsps', '1', '-evalue', evalue
    ]
    return run_command(cmd, shell=shell)
def run_makeblastdb(infile, outfile, shell=False):
    """
	Query your cluster sequences against an AA database (designed with MIBiG in mind) using blastp.
	:param infile: the query cluster AA sequences file (mpfa format)
	:param outfile: the resulting blastp databsefile
	:param shell: whether to use the shell NOT RECOMMENDED (default=False)
	:return: the STDERR/STDOUT
	"""
    # Deal with any spaces in the file paths
    if ' ' in infile:
        infile = ''.join(['"', infile, '"'])
    if ' ' in outfile:
        outfile = ''.join(['"', outfile, '"'])
    # DEBUG
    # print('\n')
    # print(infile)
    # print(outfile)
    # print(database)

    cmd = [
        'makeblastdb', '-in',
        str(infile), '-out',
        str(outfile), '-dbtype', 'prot', '-hash_index'
    ]
    # print(cmd)
    return run_command(cmd, shell=shell)
Example #3
0
def run_shogun(inpath, outpath, utree_db, threads, shell=False):
    """
	Run SHOGUN LCA using UTree.
	:param input: Directory with the FASTA files, ".fna" extension required
	:param output: Output directory for the result files
	:param utree_db: path to the UTree database (".ctr" extension
	:param threads:
	:param cpus: the number of cpus to use
	:param shell: whether to use the shell NOT RECOMMENDED (default=False)
	:return: the STDERR/STDOUT
	"""
    # Deal with any spaces in the file paths
    if ' ' in inpath:
        inpath = ''.join(['"', inpath, '"'])
    if ' ' in outpath:
        outpath = ''.join(['"', outpath, '"'])
    if ' ' in utree_db:
        utree_db = ''.join(['"', utree_db, '"'])

    cmd = [
        'shogun_utree_lca', '-i',
        str(inpath), '-o',
        str(outpath), '-u',
        str(utree_db), '-p',
        str(threads)
    ]
    return run_command(cmd, shell=shell)
Example #4
0
def bowtie2_align(infile, outfile, database, alignments_to_report=32, num_threads=SETTINGS.N_jobs, shell=False):
    """
    Search a bowtie2 index with multiple alignment.
    :param infile: the query FASTA file
    :param outfile: the resulting SAM file
    :param database: path to the bowtie2 index
    :param alignments_to_report: the number of alignments to report (default=32)
    :param num_threads: the number of threads to use (default=SETTINGS)
    :param shell: whether to use the shell NOT RECOMMENDED (default=False)
    :return: the STDERR/STDOUT
    """
    cmd = ['bowtie2',
           '--no-unal',
           '-x', database,
           '-S', outfile,
           '--np', '0',
           '--mp', '"1,1"',
           '--rdg', '"0,1"',
           '--rfg', '"0,1"',
           '--score-min', '"L,0,-0.02"',
           '--norc',
           '-f', infile,
           '--very-sensitive',
           '-k', str(alignments_to_report),
           '-p', str(num_threads),
           '--no-hd']
    return run_command(cmd, shell=shell)
Example #5
0
def utree_compress(input_uncompressed_tree, output_compressed_tree, shell=False):
    # usage: xtree-compress preTree.ubt compTree.ctr
    cmd = [
        'utree-compress',
        input_uncompressed_tree,
        output_compressed_tree
    ]
    return run_command(cmd, shell=shell)
Example #6
0
def utree_search_gg(input_compressed_tree, input_fasta_to_search, output, shell=False):
    # usage: xtree-searchGG compTreeGG.ctr fastaToSearch.fa output.txt
    cmd = [
        'utree-search_gg',
        input_compressed_tree,
        input_fasta_to_search,
        output
    ]
    return run_command(cmd, shell=shell)
Example #7
0
def utree_build(input_fasta, input_fasta_labels, output_uncompressed_tree, threads=SETTINGS.N_jobs, shell=False):
    # usage: utree-build input_fasta.fa labels.map output.ubt [threads]
    cmd = [
        'utree-build',
       input_fasta,
       input_fasta_labels,
       output_uncompressed_tree,
       threads,
    ]
    return run_command(cmd, shell=shell)
Example #8
0
def bowtie2_build(infile, outfile, offrate=3, shell=False):
    """
    This function will build a bowtie2 index with a given infile and output to the outfile
    :param infile: the FASTA file to build the index with
    :param outfile: the prefix for the bowtie2 index
    :param offrate: offrate for the index (default=3)
    :param shell: whether to use the shell NOT RECOMMENDED (default=False)
    :return: teh STDERR/STDOUT
    """
    cmd = ['bowtie2-build', '-f', '-o', str(offrate), infile, outfile]
    return run_command(cmd, shell=shell)
Example #9
0
def embalmer_align(queries, references, outfile, mode='CAPITALIST2', num_threads=SETTINGS.N_jobs, shell=False):
    cmd = [
        'embalmer',
        '--queries', queries,
        '--references', references,
        '--output', outfile,
        '--threads', str(num_threads),
        '--mode', mode,
        '--shear',
        '--latency', 16,
        '--id', .985,
    ]
    return run_command(cmd, shell=shell)
Example #10
0
def dustmasker(infile, outfile, infmt='fasta', outfmt='fasta', shell=False):
    """
    Search a bowtie2 index with multiple alignment.
    :param infile: the query FASTA file
    :param outfile: the resulting SAM file
    :param database: path to the bowtie2 index
    :param alignments_to_report: the number of alignments to report (default=32)
    :param num_threads: the number of threads to use (default=SETTINGS)
    :param shell: whether to use the shell NOT RECOMMENDED (default=False)
    :return: the STDERR/STDOUT
    """
    cmd = [
        'dustmasker',
        '-in', infile,
        '-out', outfile,
        '-infmt', infmt,
        '-outfmt', outfmt
    ]
    cmd = [str(i) for i in cmd]
    return run_command(cmd, shell=shell)
Example #11
0
def run_beta_div(infile,
                 id_level,
                 mapping,
                 variable,
                 metric,
                 outdir,
                 shell=False):
    """
	Calculate beta diversity for the matched OFUs and compare between variables of interest from a mapping file.
	:param infile: the OFU table at a given cluster ID
	:param id_level: the clustering height level (0-100)
	:param mapping: the mapping file in standard format (#SampleID is col 1 header)
	:param variable: the mapping file header for the variable of interest
	:param metric: the distance method to use: manhattan, euclidean, canberra, bray, kulczynski, jaccard, gower, altGower, morisita, horn, mountford, raup , binomial, chao, cao, or mahalanobis.
	:param outdir: the output directory/prefix - given as a string that will be appended with the ID and .png file extension
	:param shell: whether to use the shell NOT RECOMMENDED (default=False)
	:return: the STDERR/STDOUT
	"""
    # Deal with any spaces in the file paths
    if ' ' in infile:
        infile = ''.join(['"', infile, '"'])
    if ' ' in mapping:
        mapping = ''.join(['"', mapping, '"'])
    if ' ' in outdir:
        outdir = ''.join(['"', outdir, '"'])
    # DEBUG
    # print('\n')
    # print(infile)
    # print(outfile)
    # print(database)

    cmd = [
        'ofu_beta_div.R',
        str(infile),
        str(id_level),
        str(mapping),
        str(variable),
        str(metric),
        str(outdir)
    ]
    return run_command(cmd, shell=shell)
def run_alpha_div(infile,
                  id_level,
                  mapping,
                  variable,
                  metric,
                  outdir,
                  shell=False):
    """
	Calculate alpha diversity for the matched OFUs and compare between variables of interest from a mapping file.
	:param infile: the OFU table at a given cluster ID
	:param id_level: the clustering height level (0-100)
	:param mapping: the mapping file in standard format (#SampleID is col 1 header)
	:param variable: the mapping file header for the variable of interest
	:param metric: the diversity metric to use: shannon, simpson, or invsimpson
	:param outdir: the output directory/prefix - given as a string that will be appended with the metric, ID level, and .png file extension
	:param shell: whether to use the shell NOT RECOMMENDED (default=False)
	:return: the STDERR/STDOUT
	"""
    # Deal with any spaces in the file paths
    if ' ' in infile:
        infile = ''.join(['"', infile, '"'])
    if ' ' in mapping:
        mapping = ''.join(['"', mapping, '"'])
    if ' ' in outdir:
        outdir = ''.join(['"', outdir, '"'])
    # DEBUG
    # print('\n')
    # print(infile)
    # print(outfile)
    # print(database)

    cmd = [
        'ofu_beta_div.R',
        str(infile),
        str(id_level),
        str(mapping),
        str(variable),
        str(metric),
        str(outdir)
    ]
    return run_command(cmd, shell=shell)