from cogent.parse.tree import DndParser
from cogent.maths.unifrac.fast_tree import UniFracTreeNode
#from csmat import dict_to_csmat
from sparse_unifrac.unifraccsmat import unifrac_mix, unifrac_mix_weighted, sum_dict

from cogent.util.option_parsing import (parse_command_line_parameters,
                                        make_option)

script_info = {}
script_info['brief_description'] = """Calculate unifrac on one otu table """
script_info['script_description'] = ""
script_info['script_usage'] = [("","","")]
script_info['output_description']= ""
script_info['required_options'] = [
 make_option('-i', '--input_path',
     help='Input OTU table in biom format or input directory containing OTU ' +\
     'tables in biom format for batch processing.',
     type='existing_path'),
 make_option('-t', '--tree_path', default=None,
     help='Input newick tree filepath, which is required when phylogenetic' +\
     ' metrics are specified. ',
     type='existing_filepath'),
]
script_info['optional_options'] = [
 make_option('-o', '--output_dir',
     help="Output directory. One will be created if it doesn't exist.",
     type='new_dirpath'),
 make_option('-m', '--metrics', default='unweighted',
     help='Metric to use. Unweighted (default) or weighted'),
]
script_info['version'] = __version__
Example #2
0
from picrust.evaluate_test_datasets import calculate_accuracy_stats_from_observations
from biom.parse import parse_biom_table
from picrust.util import make_output_dir_for_file
from biom.table import table_factory,DenseOTUTable,SparseOTUTable
from random import shuffle

script_info = {}
script_info['brief_description'] = "Compare the accuracy of biom files (expected and observed) either by observations (default) or by samples."
script_info['script_description'] =\
    """ """
script_info['script_usage'] = [\
    ("Example 1","Compare an observed table to an expected table using relative abundance","%prog -e expected_ra.biom -o compare_results_ra.tab observed_ra.biom"),
    ("Example 2","Compare an observed table to an expected table using real counts","%prog --not_relative_abundance -e expected.biom -o compare_results.tab observed.biom")]
script_info['output_description']= "Outputs will be tab delimited file with various accuracy metrics."
script_info['required_options'] = [
 make_option('-e','--exp_trait_table_fp',type="existing_filepath",help='the expected trait table (biom format)'),\
 make_option('-o','--output_fp',type="new_filepath",help='the output file'),
]
script_info['optional_options'] = [
  make_option('-c','--compare_observations',action="store_true",default=False,help='Calculate accuracy values by comparing between observations (instead of between samples) [default: %default]'),\
  make_option('-n','--normalize',action="store_true",default=False,help='Convert both expected and observed tables to relative abundances (instead of observations) [default: %default]'),
  make_option('-l','--limit_to_expected_observations',action="store_true",default=False,help='Ignore observations that are not in the expected table[default: %default]'),
  make_option('--limit_to_observed_observations',action="store_true",default=False,help='Ignore observations that are not in the observed table[default: %default]'),
  make_option('-s','--shuffle_samples',action="store_true",default=False,help='Shuffle samples ids randomly before measuring accuracy[default: %default]'),
  make_option('--not_relative_abundance_scores',action="store_true",default=False,help='Round numbers (instead of taking ceil() which is used for RA) before calculating TP,FP,FN,TN [default: %default]')
  
        ]
script_info['disallow_positional_arguments'] = False
script_info['version'] = __version__

def transpose_biom(table):
from datetime import date
from md_menagerie.calc_time_interval import string_to_date 

script_info = {}
script_info['brief_description'] = "Given a mapping file with metadata specifying a start date, end date and sample date, generate a new metadata file with a treatment column modified according to whether samples fall before, during, or after treatment."
script_info['script_description'] =\
"""
"""

script_info['script_usage'] = [("","","")]
script_info['output_description']= "Output is metadata table (for use with add_metadata_to_mapping_file.py)"
script_info['required_options'] = [\
make_option('-i','--input_mapping_file',type="existing_filepath",\
  help='the input QIIME format mapping file '),\
  make_option('--start_time_column',\
  help='column that specifies the start time for the experiment in yyyymmdd format.'),\
  make_option('--sample_time_column',\
  help='column that specifies the sample time for that specific sample in yyyymmdd format.'),\
  make_option('--end_time_column',\
  help='column that specifies the sample time the experiment ended in yyyymmdd format.'),\
]
script_info['optional_options'] = [\
  make_option('-o','--output_mapping_file',type="new_filepath",\
   default=None,help='the output filepath for the new mappinging file, updated with metadata [default: based on input filename]'),\
  make_option('--pre_suffix',default='_pretreatment'
      help='suffix for pre-treatment samples [Default:%default]'),\
  make_option('--post_suffix',default='_posttreatment'
      help='suffix for pre-treatment samples [Default:%default]'),\
   ]
script_info['version'] = __version__

Example #4
0
#from qiime.util import load_qiime_config

#qiime_config = load_qiime_config()


script_info = {}
script_info['brief_description'] = "Starts multiple jobs in parallel on multicore or multiprocessor systems."
script_info['script_description'] = "This script is designed to start multiple jobs in parallel on systems with no queueing system, for example a multiple processor or multiple core laptop/desktop machine. This also serves as an example 'cluster_jobs' which users can use a template to define scripts to start parallel jobs in their environment."
script_info['script_usage'] = [\
 ("Example",\
 "Start each command listed in test_jobs.txt in parallel. The run id for these jobs will be RUNID. ",\
 "%prog -ms test_jobs.txt RUNID")]
script_info['output_description']= "No output is created."
script_info['required_options'] = []
script_info['optional_options'] = [\
 make_option('-m','--make_jobs',action='store_true',\
         help='make the job files [default: %default]'),\
 make_option('-s','--submit_jobs',action='store_true',\
         help='submit the job files [default: %default]'),\
 make_option('-d','--delay',action='store',type='int',default=0,
             help='Number of seconds to pause between launching each job [default: %default]'),
 make_option('-n','--num_jobs',action='store',type='int',\
             help='Number of jobs to group commands into. [default: %default]',\
                default=4)\
]
script_info['version'] = __version__
script_info['disallow_positional_arguments'] = False

def write_job_files(output_dir,commands,run_id,num_jobs=4):
    jobs_dir = '%s/jobs/' % output_dir
    job_fps = []
    if not exists(jobs_dir):
Example #5
0
script_info = {}
script_info['brief_description'] = "Calculate NRI and NTI using formulas from Phylocom 4.2/3.41"
script_info['script_description'] = "This script calculates NRI and NTI from a path to a Newick formatted tree and a path to a comma separated list of ids in that tree that form the group whose NRI/NTI you want to test. The tree is not required to have distances. If none are found script will use the number of nodes (self inclusive) as their distance from one another. NRI and NTI are calculated as described in the Phylocom manual, not as in Webb 2002, or Webb 2000. The Phylocom manual is freely available on the web and Webb 2002 can be found in the Annual Review of Ecology and Systematics: Phylogenies and Community Ecology Webb 2002."
script_info['script_usage'] = [\
    ("Calculate both NRI and NTI from the given tree and group of taxa:",
     "",
     "%prog -t gg_tree.tre -i ids.txt -m nri,nti"),
    ("Calculate only NRI:",
     "",
     "%prog -t gg_tree.tre -i ids.txt -m nri"),
    ("Calculate only NTI using a different number of iterations:",
     "",
     "%prog -t gg_tree.tre -i ids.txt -m nti -i 100")]
script_info['output_description']= "Outputs a value for specified tests"
script_info['required_options'] = [\
 make_option('-t','--tree_fp',type="existing_filepath",help='the tree filepath'),
 make_option('-g','--taxa_fp',type="existing_filepath",help='taxa list filepath')]
script_info['optional_options'] = [\
 make_option('-i','--iters',type="int",default=1000,help='number of iterations to use for sampling tips without replacement (null model 2 community sampling, see see http://bodegaphylo.wikispot.org/Community_Phylogenetics). [default: %default]'),
 make_option('-m','--methods',type='string', default='nri,nti',help='comma-separated list of metrics to calculate. [default: %default]')]
script_info['version'] = __version__
script_info['help_on_no_arguments'] = True


def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    tr = parse_newick(open(opts.tree_fp),PhyloNode)
    tip_dists, all_nodes = tr.tipToTipDistances() # tipTo returns a list of actual node objects
    all_ids = [node.Name for node in all_nodes]
script_info = {}
script_info['brief_description'] = "Perform missing data interpolation on a QIIME mapping file."
script_info['script_description'] =\
"""
This script assists in interpolating missing data for QIIME mapping files.  This is used in cases where some column values are unknown, but are predictable from known column values.   For example, algal cover measurements may be generally stable on the course of weeks, but taken quarterly.  This script would allow interpolation of values for the weeks in which no measurement was taken.

"""

script_info['script_usage'] = [("","Generate a 3d plot of temperature x month x equitability for control samples found in the mapping file herbivore_mapping_r15_with_alpha.txt.  Save to output file 3d_plot_test.png",\
        "%prog -m ./test_script_data/herbivore_mapping_r15_with_alpha.txt -c 'HCOM_temp_5m,month,equitability_even_500_alpha' -f 'treatment:Control' -o ./test_script_data/3d_plot_test.png")]
script_info['output_description']= "Output is a new QIIME mapping file, with interpolated data values added."
script_info['required_options'] = [\
make_option('-m','--input_mapping_file',type="existing_filepath",\
  help='the input QIIME format mapping file.'),\
  make_option('-y','--interpolation_columns',\
  help='A comma-separated list of metadata headers to interpolate (i.e. the y axis in a linear regression)'),\
  make_option('-x','--reference_column',\
  help='A reference column over which to interpolate (i.e. the x axis in a linear regression'),\
]
script_info['optional_options'] = [\
 make_option('-s','--split_col',default=None,
    help="Name of columns to use in splitting up the dataset before interpolation (e.g. interpolate only within data that share values for all of these columns).  If provided, the table will be split based on all unique values of this column and results interpolated within each, then merged back into a single output [default:%default]"),\
 make_option('-o','--output_file',type="new_filepath",\
   default='interpolated_vals.txt',help='the output filepath for interpolated values.[default: %default]'),\
]

script_info['version'] = __version__

 

if __name__ == "__main__":
Example #7
0
__credits__ = ["Jose Antonio Navas Molina"]
__license__ = "GPL"
__version__ = "1.4.0-dev"
__maintainer__ = "Jose Antonio Navas Molina"
__email__ = "*****@*****.**"
__status__ = "Development"

from cogent.util.option_parsing import parse_command_line_parameters, make_option
from tgz_manager import compress_to_tgz

script_info = {}
script_info['brief_description'] = "Generate a tgz file which contains the desired path compressed"
script_info['script_description'] = """If input_path is a file: generate a tgz file with the input file compressed.
If input_path is a directory: generate a tgz file with the content of the input directory compressed."""
script_info['script_usage'] = [("Example:", "Generate a tgz file named 'out.tgz' which contains the content of the directory 'in_dir'", "%prog -i in_dir -o out.tgz")]
script_info['output_description'] = ""
script_info['required_options'] = [
	make_option('-i', '--input_path', type="existing_path",
				help='Path to the directory or file to compress'),
	make_option('-o', '--output_tgz', type="new_filepath",
				help='File path of the output tgz file')
]
script_info['optional_options'] = []
script_info['version'] = __version__

if __name__ == '__main__':
	option_parser, opts, args = parse_command_line_parameters(**script_info)
	input_path = opts.input_path
	tgz_fp = opts.output_tgz

	compress_to_tgz(input_path, tgz_fp)
Example #8
0
from cogent.app.parameters import ValuedParameter, FlagParameter, \
       MixedParameter
from cogent.app.util import CommandLineApplication, FilePath, system, \
       CommandLineAppResult, ResultPath, remove, ApplicationError
from subprocess import Popen, PIPE
from datetime import datetime

script_info = {}
script_info[
    'brief_description'] = "An application controller for the BayesTraits program (Pagel & Meade)"
script_info['script_description'] = ""
script_info['script_usage'] = [("", "", "")]
script_info[
    'output_description'] = "Outputs 1) A table of reconstructions 2)A log file"
script_info['required_options'] = [\
  make_option('-t','--input_tree',type="existing_filepath",help='the input tree file in Newick format'),\
  make_option('-d','--input_trait_data',type="existing_filepath",help='the input trait table in Trait Table format')]
script_info['optional_options'] = [\
 # Example optional option

 make_option('--debug',action="store_true",default=False,help='display verbose output [default: %default]')\
]
script_info['version'] = __version__
script_info['help_on_no_arguments'] = False


class BayesTraits(CommandLineApplication):
    """BayesTraits application Controller"""

    _command = 'BayesTraits'
    _input_handler = '_input_as_lines'
from picrust.util import write_biom_table

script_info = {}
script_info['brief_description'] = "Collapse table data to a specified level in a hierarchy."
script_info['script_description'] = "This script collapses hierarchical data to a specified level. For instance, often it is useful to examine KEGG results from a higher level within the pathway hierarchy. Many genes are sometimes involved in multiple pathways, and in these circumstances (also know as a one-to-many relationship), the gene is counted for each pathway. This has a side effect of increasing the total count of genes in the table."
script_info['script_usage'] = [\
("","Collapse predicted metagenome using KEGG Pathway metadata.","""%prog -i predicted_metagenomes.biom -c KEGG_Pathways -l 3 -o predicted_metagenomes.L3.biom"""),\
("","Change output to tab-delimited format (instead of BIOM).","""%prog -f -i predicted_metagenomes.biom -c KEGG_Pathways -l 3 -o predicted_metagenomes.L3.txt"""),\
("","Collapse COG Categories.","""%prog -i cog_predicted_metagenomes.biom -c COG_Category -l 2 -o cog_predicted_metagenomes.L2.biom"""),\
("","Collapse predicted metagenome using taxonomy metadata (not one-to-many).","""%prog -i observation_table.biom -c taxonomy -l 1 -o observation_table.L1.biom"""),\


]
script_info['output_description']= "Output table is contains gene counts at a higher level within a hierarchy."
script_info['required_options'] = [\
 make_option('-i','--input_fp',type="existing_filepath",help='the predicted metagenome table'),\
 make_option('-o','--output_fp',type='new_filepath', help='the resulting table'),
 make_option('-c','--metadata_category',type='string',help='the metadata category that describes the hierarchy (e.g. KEGG_Pathways, COG_Category, etc.). Note: RFAM predictions can not be collapsed because there are no categories to group them into.'),
 make_option('-l','--level',type='int',help='the level in the hierarchy to collapse to. A value of 0 is not allowed, a value of 1 is the highest level, and any higher value nears the leaves of the hierarchy. For instance, if the hierarchy contains 4 levels, specifying 3 would collapse at one level above being fully specified.')
]
script_info['optional_options'] = [
 make_option('--ignore',type='string',default=None, help="Ignore the comma separated list of names. For instance, specifying --ignore_unknown=unknown,unclassified will ignore those labels while collapsing. The default is to not ignore anything. [default: %default]"),
 make_option('-f','--format_tab_delimited',action="store_true",default=False,help='output the predicted metagenome table in tab-delimited format [default: %default]')]
script_info['version'] = __version__

def make_collapse_f(category, level, ignore):
    """produce a collapsing function for one-to-many relationships"""
    # adjust level such that, for instance, level 1 corresponds to index 0
    if level > 0:
        level -= 1
Example #10
0
                                "%prog -o out_directory")]

script_info['output_description'] = "X = [Headers, Data]"

#!!! ADD REQUIRED ARGUMENTS HERE !!!#
script_info['required_options'] = []

#!!! ADD OPTIONAL ARGUMENTS HERE !!!#
script_info['optional_options'] = [
    ### FOR INPUT FILE ###
    # make_option('-short_argument','--long_argument',default=None,type="existing_filepath",help='the input filepath'),
    ### FOR OUTPUT DIRECTORY ###
    # make_option('-short_argument','--long_argument',default=None,type="new_dirpath",help='the output dirpath'),
    make_option('-i',
                '--input_file',
                default=None,
                type="existing_filepath",
                help='the biom table filepath'),
    make_option('-m',
                '--map_file',
                default=None,
                type="existing_filepath",
                help='the map filepath'),
    make_option('-o',
                '--output_file',
                default=None,
                type="new_dirpath",
                help='the output path for pdf'),
]

script_info['version'] = __version__
Example #11
0
script_info = {
    'brief_description':
    'Downloads PICRUSt pre-calculated files.',
    'script_description':
    ('Downloads PICRUSt pre-calculated files to the data directory'
     ' ({}).'.format(DATA_DIR)),
    'script_usage': [('', 'Download default pre-calculated files:', '%prog')],
    'output_description':
    ('Prints the result of the download attempt to the screen (STDOUT).'),
    'required_options': [],
    'optional_options': [
        make_option('-t',
                    '--type_of_prediction',
                    default=type_of_prediction_choices[0],
                    type="choice",
                    choices=type_of_prediction_choices,
                    help='Type of functional predictions. Valid choices are:'
                    ' {choices} [default: %default]'.format(
                        choices=', '.join(type_of_prediction_choices))),
        make_option(
            '-g',
            '--gg_version',
            default=gg_version_choices[0],
            type="choice",
            choices=gg_version_choices,
            help='Version of GreenGenes that was used for OTU picking. Valid'
            ' choices are: {choices} [default: %default]'.format(
                choices=', '.join(gg_version_choices))),
        make_option(
            '--with_confidence',
            default=False,
Example #12
0
script_info = {}
script_info[
    'brief_description'] = "This script converts metagenomic relative abundance back to sequence counts, by scaling the relative abundnace of each gene in each sample in a biom file by a user-supplied sequencing depth"
script_info['script_description'] = ""
script_info['script_usage'] = [
    ("", "Predict metagenomes from genomes.biom and otus.biom.",
     "%prog -i otus.biom -s sample_scaling.tsv -o scaled_otus.biom")
]
script_info[
    'output_description'] = "A new biom file where each sample has been scaled accordingly."
script_info['required_options'] = [
    make_option(
        '-s',
        '--input_seq_depth_file',
        type='existing_filepath',
        help=
        'an input tab-delimited table, with samples as the first column and an integer sequencing depth as the second'
    ),
    make_option(
        '-i',
        '--input_count_table',
        type="existing_filepath",
        help=
        'the input trait counts on per otu basis in biom format (can be gzipped)'
    ),
    make_option('-o',
                '--output_metagenome_table',
                type="new_filepath",
                help='the output file for the scaled metagenome')
]
from cogent.parse.tree import DndParser
from cogent.maths.unifrac.fast_tree import UniFracTreeNode
#from csmat import dict_to_csmat
from sparse_unifrac.unifraccsmat import unifrac_mix, unifrac_mix_weighted, sum_dict

from cogent.util.option_parsing import (parse_command_line_parameters,
                                        make_option)

script_info = {}
script_info['brief_description'] = """Calculate unifrac on one otu table """
script_info['script_description'] = ""
script_info['script_usage'] = [("", "", "")]
script_info['output_description'] = ""
script_info['required_options'] = [
    make_option('-i', '--input_path',
     help='Input OTU table in biom format or input directory containing OTU ' +\
     'tables in biom format for batch processing.',
     type='existing_path'),
    make_option('-t', '--tree_path', default=None,
     help='Input newick tree filepath, which is required when phylogenetic' +\
     ' metrics are specified. ',
     type='existing_filepath'),
]
script_info['optional_options'] = [
    make_option(
        '-o',
        '--output_dir',
        help="Output directory. One will be created if it doesn't exist.",
        type='new_dirpath'),
    make_option('-m',
                '--metrics',
                default='unweighted',
Example #14
0
from picrust.ace import ace_for_picrust
from picrust.ancestral_state_reconstruction import run_asr_in_parallel
from picrust.util import make_output_dir_for_file, make_output_dir

script_info = {}
script_info[
    'brief_description'] = "Runs ancestral state reconstruction given a tree and trait table"
script_info['script_description'] = "\
Provides a common interface for running various ancenstral state reconstruction methods (e.g. ACE, BayesTraits, etc.)."
script_info['script_usage'] = [\
("Example 1","Provide a tree file and trait table file:","%prog -i trait_table.tab -t pruned_tree.newick -o asr_counts.tab -c asr_ci.tab")]
script_info[
    'output_description'] = "A table containing trait information for internal nodes of the tree."

script_info['required_options'] = [\
make_option('-t','--input_tree_fp',type="existing_filepath",help='the tree to use for ASR'),\
make_option('-i','--input_trait_table_fp',type="existing_filepath",help='the trait table to use for ASR'),\
]
asr_method_choices = ['ace_ml', 'ace_reml', 'ace_pic', 'wagner']
parallel_method_choices = ['sge', 'torque', 'multithreaded']

script_info['optional_options'] = [\
make_option('-m','--asr_method',type='choice',
                help='Method for ancestral state reconstruction. Valid choices are: '+\
                ', '.join(asr_method_choices) + ' [default: %default]',\
                choices=asr_method_choices,default='ace_pic'),\
make_option('-o','--output_fp',type="new_filepath",help='output trait table [default:%default]',default='asr_counts.tab'),\
make_option('-c','--output_ci_fp',type="new_filepath",help='output table containing 95% confidence intervals, loglik, and brownian motion parameters for each asr prediction [default:%default]',default='asr_ci.tab'),\
make_option('-p','--parallel',action="store_true",help='allow parallelization of asr',default=False),\
make_option('-j','--parallel_method',type='choice',
                help='Method for parallelization. Valid choices are: '+\
                ', '.join(parallel_method_choices) + ' [default: %default]',\
Example #15
0
from cogent import LoadTree
from cogent.util.option_parsing import parse_command_line_parameters, make_option
from cogent.app.parameters import ValuedParameter, FlagParameter, \
       MixedParameter
from cogent.app.util import CommandLineApplication, FilePath, system, \
       CommandLineAppResult, ResultPath, remove, ApplicationError
from subprocess import Popen,PIPE
from datetime import datetime

script_info = {}
script_info['brief_description'] = "An application controller for the BayesTraits program (Pagel & Meade)"
script_info['script_description'] = ""
script_info['script_usage'] = [("","","")]
script_info['output_description']= "Outputs 1) A table of reconstructions 2)A log file"
script_info['required_options'] = [\
  make_option('-t','--input_tree',type="existing_filepath",help='the input tree file in Newick format'),\
  make_option('-d','--input_trait_data',type="existing_filepath",help='the input trait table in Trait Table format')]
script_info['optional_options'] = [\
 # Example optional option
 make_option('--debug',action="store_true",default=False,help='display verbose output [default: %default]')\
]
script_info['version'] = __version__
script_info['help_on_no_arguments'] = False



class BayesTraits(CommandLineApplication):
    """BayesTraits application Controller"""

    _command = 'BayesTraits'
    _input_handler = '_input_as_lines'
Example #16
0
  'random_neighbor']
WEIGHTING_CHOICES = ['exponential', 'linear', 'equal']
CONFIDENCE_FORMAT_CHOICES = ['sigma', 'confidence_interval']

#Add script information
script_info['script_usage'] = [\
("","Required options with NSTI:","%prog -a -i trait_table.tab -t reference_tree.newick -r asr_counts.tab -o predict_traits.tab"),\
("","Limit predictions to particular tips in OTU table:","%prog -a -i trait_table.tab -t reference_tree.newick -r asr_counts.tab -o predict_traits_limited.tab -l otu_table.tab"),
("","Reconstruct confidence","%prog -a -i trait_table.tab -t reference_tree.newick -r asr_counts.tab -c asr_ci.tab -o predict_traits.tab")
                               ]
#Define commandline interface
script_info[
    'output_description'] = "Output is a table (tab-delimited or .biom) of predicted character states"
script_info['required_options'] = [\
make_option('-i','--observed_trait_table',type="existing_filepath",\
  help='the input trait table describing directly observed traits (e.g. sequenced genomes) in tab-delimited format'),\
make_option('-t','--tree',type="existing_filepath",\
  help='the full reference tree, in Newick format')
                                   ]
script_info['optional_options'] = [\
 make_option('-o','--output_trait_table',type="new_filepath",\
   default='predicted_states.tsv',help='the output filepath for trait predictions [default: %default]'),\
 make_option('-a','--calculate_accuracy_metrics',default=False,action="store_true",\
   help='if specified, calculate accuracy metrics (i.e. how accurate does PICRUSt expect its predictions to be?) and add to output file [default: %default]'),\
 make_option('--output_accuracy_metrics_only',type="new_filepath",\
   default=None,help='if specified, calculate accuracy metrics (e.g. NSTI), output them to this filepath, and do not do anything else. [default: %default]'),\

 make_option('-m','--prediction_method',default='asr_and_weighting',choices=METHOD_CHOICES,help='Specify prediction method to use.  The recommended prediction method is set as default, so other options are primarily useful for control experiments and methods validation, not typical use.  Valid choices are:'+",".join(METHOD_CHOICES)+'.  "asr_and_weighting"(recommended): use ancestral state reconstructions plus local weighting with known tip nodes.  "nearest_neighbor": predict the closest tip on the tree with trait information.  "random_annotated_neighbor": predict a random tip on the tree with trait information. "asr_only": predict the traits of the last reconstructed ancestor, without weighting. "weighting_only": weight all genomes by distance, to the organism of interest using the specified weighting function and predict the weighted average.   [default: %default]'),\

 make_option('-w','--weighting_method',default='exponential',choices=WEIGHTING_CHOICES,help='Specify prediction the weighting function to use.  This only applies to prediction methods that incorporate local weighting ("asr_and_weighting" or "weighting_only")  The recommended weighting  method is set as default, so other options are primarily useful for control experiments and methods validation, not typical use.  Valid choices are:'+",".join(WEIGHTING_CHOICES)+'.  "exponential"(recommended): weight genomes as a negative exponent of distance.  That is 2^-d, where d is the tip-to-tip distance from the genome to the tip.  "linear": weight tips as a linear function of weight, normalized to the maximum possible distance (max_d -d)/d. "equal_weights": set all weights to a constant (ignoring branch length).   [default: %default]'),
make_option('-l','--limit_predictions_by_otu_table',type="existing_filepath",help='Specify a valid path to a legacy QIIME OTU table to perform predictions only for tips that are listed in the OTU table (regardless of abundance)'),\
 make_option('-g','--limit_predictions_to_organisms',help='Limit predictions to specific, comma-separated organims ids. (Generally only useful for lists of < 10 organism ids, for example when performing leave-one-out cross-validation).'),\
Example #17
0
__license__ = "GPL"
__version__ = "1.4.0-dev"
__maintainer__ = "Jose Antonio Navas Molina"
__email__ = "*****@*****.**"
__status__ = "Development"

from cogent.util.option_parsing import parse_command_line_parameters, make_option
from xml_generator import make_xml

script_info = {}
script_info['brief_description'] = "Generates a Galaxy XML file from a given QIIME script"
script_info['script_description'] = "Reads the input script, looks for his 'script_info' and extract all the information necessary to generate the XML file. The script generated takes the same name as the original script, but changing his extension to XML. Once the XML is generated, the script must be put into the Galaxy's tools folder and edit tool_conf.xml "
script_info['script_usage'] = [("Example:", "Generate the Galaxy XML file from the script 'my_script.py' without including the '--opt_a' and the '--opt_b' options.", "%prog -i my_script.py -r opt_a,opt_b")]
script_info['output_description'] = "An XML file that Galaxy can reads and make the tool available via web browser"
script_info['required_options'] = [
	make_option('-i', '--script_fp', type="existing_filepath",
				help='the QIIME python script filepath to generate'),
	make_option('-o', '--output_dir', type="existing_dirpath",
				help='output directory where to save the XML file')
]
script_info['optional_options'] = [
	make_option('-r', '--remove_opts', type="string",
				help='List of option names (e.g. "option1,option2") that will not appear in the xml'),
]
script_info['version'] = __version__

if __name__ == '__main__':
	option_parser, opts, args = parse_command_line_parameters(**script_info)
	script_fp = opts.script_fp
	output_dir = opts.output_dir
	remove_opts = opts.remove_opts
Example #18
0
from picrust.predict_metagenomes import transfer_observation_metadata
from os import path
from os.path import join
from picrust.util import get_picrust_project_dir, convert_precalc_to_biom,make_output_dir_for_file, write_biom_table
import gzip

script_info = {}
script_info['brief_description'] = "Normalize an OTU table by marker gene copy number"
script_info['script_description'] = ""
script_info['script_usage'] = [
("","Normalize the OTU abundances for a given OTU table picked against the newest version of Greengenes:","%prog -i closed_picked_otus.biom -o normalized_otus.biom"),
("","Change the version of Greengenes used for OTU picking:","%prog -g 18may2012 -i closed_picked_otus.biom -o normalized_otus.biom")
]
script_info['output_description']= "A normalized OTU table"
script_info['required_options'] = [
 make_option('-i','--input_otu_fp',type="existing_filepath",help='the input otu table filepath in biom format'),
 make_option('-o','--output_otu_fp',type="new_filepath",help='the output otu table filepath in biom format'),
]
gg_version_choices=['13_5','18may2012']
script_info['optional_options'] = [
    make_option('-g','--gg_version',default=gg_version_choices[0],type="choice",\
                    choices=gg_version_choices,\
                    help='Version of GreenGenes that was used for OTU picking. Valid choices are: '+\
                    ', '.join(gg_version_choices)+\
                    ' [default: %default]'),

    make_option('-c','--input_count_fp',default=None,type="existing_filepath",\
                    help='Precalculated input marker gene copy number predictions on per otu basis in biom format (can be gzipped).Note: using this option overrides --gg_version. [default: %default]'),
    make_option('--metadata_identifer',
             default='CopyNumber',
             help='identifier for copy number entry as observation metadata [default: %default]'),
from cogent.util.misc import app_path
from cogent.app.util import get_tmp_filename
from picrust.make_cluster_jobs import make_torque_jobs, submit_cluster_jobs
from cogent.util.option_parsing import parse_command_line_parameters, make_option

script_info = {}
script_info['brief_description'] = "Starts multiple jobs in parallel on Torque/qsub based multiprocessor systems."
script_info['script_description'] = "This script is designed to start multiple jobs in parallel on cluster systems with a SGE/qsub based scheduling system."
script_info['script_usage'] = [\
 ("Example",\
 "Start each command listed in test_jobs.txt in parallel. The run id for these jobs will be RUNID. ",\
 "%prog -ms test_jobs.txt RUNID")]
script_info['output_description']= "No output is created."
script_info['required_options'] = []
script_info['optional_options'] = [\
    make_option('-m','--make_jobs',action='store_true',\
                    help='make the job files [default: %default]'),

    make_option('-s','--submit_jobs',action='store_true',\
                    help='submit the job files [default: %default]'),

    make_option('-q','--queue',action='store',\
                    type='string',dest='queue', \
                    help='name of queue to submit to '+\
                    ' [default: %default]'),

    make_option('-j','--job_dir', action='store',\
                    type='string',dest='job_dir',\
                    help='directory to store the jobs '+\
                    '[default: %default]', default="jobs/"),
    make_option('-n','--num_jobs',action='store',type='int',\
                help='Number of jobs to group commands into. [default: %default]',\
Example #20
0
script_info = {}
script_info[
    'brief_description'] = "Evaluate the accuracy of character predictions, given directories of expected vs. observed test results"
script_info['script_description'] =\
    """The script finds all paired expected and observed values in a set of directories and generates the following output: 1) data for a scatterplot of observed vs. expected values for each character (typically gene family count) within each organism (so one file per organism). 2) A summary of accuracy across all organisms.   
    character """
script_info['script_usage'] = [(
    "",
    "Evaluate the accuracy of all predictions in a folder, and output summary statistics.",
    "%prog -i obs_otu_table.biom -e exp_otu_table.txt -o./evaluation_results/")
                               ]
script_info[
    'output_description'] = "Outputs will be obs,exp data points for the comparison"
script_info['required_options'] = [
    make_option('-i','--trait_table_dir',type="existing_dirpath",help='the input trait table directory (files in biom format)'),\
 make_option('-e','--exp_trait_table_dir',type="existing_dirpath",help='the input expected trait table directory (files in biom format)'),\
 make_option('-o','--output_dir',type="new_dirpath",help='the output directory'),
]
script_info['optional_options'] = [
        make_option('-f','--field_order',\
                default='file_type,prediction_method,weighting_method,holdout_method,distance,organism',help='pass comma-separated categories, in the order they appear in file names.   Categories are "file_type","prediction_method","weighting_method","holdout_method" (randomization vs. holdout),"distance",and "organism".  Example:  "-f file_type,test_method,asr_method specifies that files will be in the form: predict_traits--distance_exclusion--wagner.  Any unspecified values are set to "not_specified".  [default: %default]'),\
        make_option('-p','--pool_by',\
          default='distance',help='pass comma-separated categories to pool results by those metadata categories. Valid categories are: holdout_method, prediction_method,weighting_method,distance and organism. For example, pass "distance" to output results pooled by holdout distance in addition to holdout method and prediction method  [default: %default]')
]
script_info['version'] = __version__



def evaluate_test_dataset_dir(obs_dir_fp,exp_dir_fp,file_name_delimiter="--",\
        file_name_field_order=\
Example #21
0
METHOD_CHOICES = ['asr_and_weighting','nearest_neighbor','asr_only','weighting_only',\
  'random_neighbor']
WEIGHTING_CHOICES = ['exponential','linear','equal']
CONFIDENCE_FORMAT_CHOICES = ['sigma','confidence_interval']

#Add script information
script_info['script_usage'] = [\
("","Required options with NSTI:","%prog -a -i trait_table.tab -t reference_tree.newick -r asr_counts.tab -o predict_traits.tab"),\
("","Limit predictions to particular tips in OTU table:","%prog -a -i trait_table.tab -t reference_tree.newick -r asr_counts.tab -o predict_traits_limited.tab -l otu_table.tab"),
("","Reconstruct confidence","%prog -a -i trait_table.tab -t reference_tree.newick -r asr_counts.tab -c asr_ci.tab -o predict_traits.tab")
]
#Define commandline interface
script_info['output_description']= "Output is a table (tab-delimited or .biom) of predicted character states"
script_info['required_options'] = [\
make_option('-i','--observed_trait_table',type="existing_filepath",\
  help='the input trait table describing directly observed traits (e.g. sequenced genomes) in tab-delimited format'),\
make_option('-t','--tree',type="existing_filepath",\
  help='the full reference tree, in Newick format')
]
script_info['optional_options'] = [\
 make_option('-o','--output_trait_table',type="new_filepath",\
   default='predicted_states.tsv',help='the output filepath for trait predictions [default: %default]'),\
 make_option('-a','--calculate_accuracy_metrics',default=False,action="store_true",\
   help='if specified, calculate accuracy metrics (i.e. how accurate does PICRUSt expect its predictions to be?) and add to output file [default: %default]'),\
 make_option('--output_accuracy_metrics_only',type="new_filepath",\
   default=None,help='if specified, calculate accuracy metrics (e.g. NSTI), output them to this filepath, and do not do anything else. [default: %default]'),\

 make_option('-m','--prediction_method',default='asr_and_weighting',choices=METHOD_CHOICES,help='Specify prediction method to use.  The recommended prediction method is set as default, so other options are primarily useful for control experiments and methods validation, not typical use.  Valid choices are:'+",".join(METHOD_CHOICES)+'.  "asr_and_weighting"(recommended): use ancestral state reconstructions plus local weighting with known tip nodes.  "nearest_neighbor": predict the closest tip on the tree with trait information.  "random_annotated_neighbor": predict a random tip on the tree with trait information. "asr_only": predict the traits of the last reconstructed ancestor, without weighting. "weighting_only": weight all genomes by distance, to the organism of interest using the specified weighting function and predict the weighted average.   [default: %default]'),\

 make_option('-w','--weighting_method',default='exponential',choices=WEIGHTING_CHOICES,help='Specify prediction the weighting function to use.  This only applies to prediction methods that incorporate local weighting ("asr_and_weighting" or "weighting_only")  The recommended weighting  method is set as default, so other options are primarily useful for control experiments and methods validation, not typical use.  Valid choices are:'+",".join(WEIGHTING_CHOICES)+'.  "exponential"(recommended): weight genomes as a negative exponent of distance.  That is 2^-d, where d is the tip-to-tip distance from the genome to the tip.  "linear": weight tips as a linear function of weight, normalized to the maximum possible distance (max_d -d)/d. "equal_weights": set all weights to a constant (ignoring branch length).   [default: %default]'),
 make_option('-l','--limit_predictions_by_otu_table',type="existing_filepath",help='Specify a valid path to a legacy QIIME OTU table to perform predictions only for tips that are listed in the OTU table (regardless of abundance)'),\
 make_option('-g','--limit_predictions_to_organisms',help='Limit predictions to specific, comma-separated organims ids. (Generally only useful for lists of < 10 organism ids, for example when performing leave-one-out cross-validation).'),\
Example #22
0
  """Reformats scripts and trait tables.  Optional fixes include:
        -- Add short (epsilon) branch lengths in place of 0 length branches
        -- Filter out taxa that don't match between tree and trait table
        -- Output tree in NEXUS format
        -- Ensure tree is bifurcating (remove polytomies using very short branches)
        -- Convert floating point trait values to integers
        -- Add a short branch length to the root branch (required by BayesTraits)
        -- Remove internal node names (required by BayesTraits)
        """

script_info['script_usage'] = [\
    ("Example 1","Reformat a tree and trait table with default options:","%prog -i traits.tab -t tree.nwk -o ./format_output/")]
script_info[
    'output_description'] = "Outputs a reformatted tree and trait table."
script_info['required_options'] = [\
          make_option('-t','--input_tree',type="existing_filepath",help='the input tree (Newick format)'),\
          make_option('-i','--input_trait_table',type="existing_filepath",help='the input trait table (QIIME OTU table format)')
                  ]

delimiter_choices = ['tab', 'space', 'comma']
script_info['optional_options'] = [\
          make_option('-m','--tree_to_trait_mapping',default=None,type="existing_filepath",help='a two-column, tab-delimited text file mapping identifiers in the tree(column 1) to identifiers in the trait table (column 2). If supplied, the identifiers in the trait table will be converted to match the identifiers in the tree. (This mapping does not need to be supplied if the tree and trait table already use a common set of identifiers.) [default: %default]'),\
          make_option('-o','--output_dir',default='./formatted/',type="new_filepath",help='the output directory [default: %default]'),\
          make_option('--input_table_delimiter',default='tab',type="choice",choices=delimiter_choices,\
            help='The character delimiting fields in the input trait table. Valid choices are:'+','.join(delimiter_choices)+' [default: %default]'),\
          make_option('--output_table_delimiter',default='tab',type="choice",choices=delimiter_choices,\
            help='The character delimiting fields in the output trait table. Valid choices are:'+','.join(delimiter_choices)+' [default: %default]'),\
          make_option('--suppress_bifurcating',default=False,action="store_true",help="If set, don't ensure that tree is fully bifurcating. [default: %default]"),\
          make_option('-n','--convert_to_nexus',default=False,action="store_true",help='Convert tree to NEXUS format, including a translate block mapping tip names to numbers. [default: %default]'),\
          make_option('-c','--convert_values_to_ints',default=False,action="store_true",help='Convert the values for each character state to integers. [default: %default]'),\
          make_option('--no_minimum_branch_length',default=False,action="store_true",help="If set, don't ensure all branches have at least a small but non-zero branchlength. [default: %default]"),\
script_info = {}
script_info['brief_description'] = "GC Content"
script_info['script_description'] = "Count GC content for each sliding window. Input must be a fasta file in one single line.\n\n"\
"REQUIREMENTS: It uses the script infoseq, so you need to have it previously installed\n"\
"http://emboss.sourceforge.net/apps/cvs/emboss/apps/infoseq.html"
"It outputs a bed file with the following format:\n"\
    "chr1\tWindowStart\tWindowEnd\tGC%"
script_info['script_usage'] = [\
 ("",
  "Counts GC content with a sliding window defined by user",
  "%prog -f in.fasta -win int > outfile.bed")]
script_info['output_description']= "Tab file with the following format:"\
    "chr1\tWindowStart\tWindowEnd\tGC%"
script_info['required_options'] = [\
    make_option('-i','--fasta',type="existing_filepath",help='Fasta file to count the GC by sliding windows'),\
    make_option('-w','--window',help='(int) Sliding window size')
]

script_info['version'] = __version__


def count(args):
    win = int(args.window)
    fileInput = open(args.fasta, 'r')
    genome = fileInput.readlines()[1]
    fileOut = open("GC_density.csv", "w")

    for i in xrange(0,len(genome)-1, int(win)): 
         #print(i)
         subString = genome[i:i+win-1]
Example #24
0
from picrust.make_cluster_jobs import make_torque_jobs, submit_cluster_jobs
from cogent.util.option_parsing import parse_command_line_parameters, make_option

script_info = {}
script_info[
    'brief_description'] = "Starts multiple jobs in parallel on Torque/qsub based multiprocessor systems."
script_info[
    'script_description'] = "This script is designed to start multiple jobs in parallel on cluster systems with a SGE/qsub based scheduling system."
script_info['script_usage'] = [\
 ("Example",\
 "Start each command listed in test_jobs.txt in parallel. The run id for these jobs will be RUNID. ",\
 "%prog -ms test_jobs.txt RUNID")]
script_info['output_description'] = "No output is created."
script_info['required_options'] = []
script_info['optional_options'] = [\
    make_option('-m','--make_jobs',action='store_true',\
                    help='make the job files [default: %default]'),

    make_option('-s','--submit_jobs',action='store_true',\
                    help='submit the job files [default: %default]'),

    make_option('-d','--delay',action='store',type='int',default=0,
                    help='Number of seconds to pause between launching each job [default: %default]'),

    make_option('-q','--queue',action='store',\
                    type='string',dest='queue', \
                    help='name of queue to submit to '+\
                    ' [default: %default]'),

    make_option('-j','--job_dir', action='store',\
                    type='string',dest='job_dir',\
                    help='directory to store the jobs '+\
Example #25
0
script_info = {}
script_info["brief_description"] = "Starts multiple jobs in parallel on multicore or multiprocessor systems."
script_info[
    "script_description"
] = "This script is designed to start multiple jobs in parallel on systems with no queueing system, for example a multiple processor or multiple core laptop/desktop machine. This also serves as an example 'cluster_jobs' which users can use a template to define scripts to start parallel jobs in their environment."
script_info["script_usage"] = [
    (
        "Example",
        "Start each command listed in test_jobs.txt in parallel. The run id for these jobs will be RUNID. ",
        "%prog -ms test_jobs.txt RUNID",
    )
]
script_info["output_description"] = "No output is created."
script_info["required_options"] = []
script_info["optional_options"] = [
    make_option("-m", "--make_jobs", action="store_true", help="make the job files [default: %default]"),
    make_option("-s", "--submit_jobs", action="store_true", help="submit the job files [default: %default]"),
    make_option(
        "-n",
        "--num_jobs",
        action="store",
        type="int",
        help="Number of jobs to group commands into. [default: %default]",
        default=4,
    ),
]
script_info["version"] = __version__
script_info["disallow_positional_arguments"] = False


def write_job_files(output_dir, commands, run_id, num_jobs=4):
Example #26
0
__license__ = "GPL"
__version__ = "1.1.4"
__maintainer__ = "Greg Caporaso"
__email__ = "*****@*****.**"

from cogent.util.option_parsing import parse_command_line_parameters, make_option

script_info = {}
script_info['brief_description'] = ""
script_info['script_description'] = ""
script_info['script_usage'] = [("", "", "")]
script_info['output_description'] = ""
script_info['required_options'] = []
script_info['optional_options'] = [
    make_option('--suppress_unit_tests',
                action='store_true',
                help='suppress unit tests [default: %default]',
                default=False),
    make_option('--suppress_script_usage_tests',
                action='store_true',
                help='suppress script usage tests [default: %default]',
                default=False),
    make_option(
        '--unittest_glob',
        help='wildcard pattern to match tests to run [default: run all]',
        default=None),
    make_option('--script_usage_tests',
                help='comma-separated list of tests to run [default: run all]',
                default=None),
]
script_info['version'] = __version__
script_info['help_on_no_arguments'] = False
Example #27
0
script_info = {}
script_info['brief_description'] = "Count sequences in one or more fasta files."
script_info['script_description'] = "This script counts the number of sequences in one or more fasta files and prints the results to stdout."
script_info['script_usage'] = [\
 ("Count sequences in one file",
  "Count the sequences in a fasta file and write results to stdout.",
  "%prog -i in.fasta"),
 ("Count sequences in two file",
  "Count the sequences in two fasta files and write results to stdout.",
  "%prog -i in1.fasta,in2.fasta"),
  ("Count the sequences in many fasta files",
   "Count the sequences all .fasta files in current directory and write results to stdout. Note that -i option must be quoted.",
   "%prog -i \"*.fasta\"")]
script_info['output_description']= "Tabular data is written to stdout."
script_info['required_options'] = [
 make_option('-i','--input_fps',
        help='the input filepaths (comma-separated)'),
]
script_info['optional_options'] = [
 make_option('--suppress_errors',action='store_true',\
        help='Suppress warnings about missing files [default: %default]',
        default=False)
]
script_info['version'] = __version__

def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    suppress_errors = opts.suppress_errors

    input_fps = []
    for input_fp in opts.input_fps.split(','):
Example #28
0
script_info = {}
script_info[
    'brief_description'] = "Generates test datasets for cross-validation studies of PICRUSt's accuracy"
script_info['script_description'] = ""
script_info['script_usage'] = [(
    "",
    "Generate holdout test trees from genome_tree.newick, and save results in the directory ./test_holdout_trees/.",
    "%prog -t genome_tree.newick -o ./test_holdout_trees")]
script_info['output_description'] = ""
method_choices = [
    'exclude_tips_by_distance', 'randomize_tip_labels_by_distance',
    'collapse_tree_by_distance'
]
script_info['required_options'] = [

    make_option('-i','--input_trait_table',type='existing_filepath',\
   help='the input trait table.'),\
 make_option('-t','--input_tree',type='existing_filepath',\
   help='the input tree in Newick format'),\
]
script_info['optional_options'] = [\
  make_option('-o','--output_dir',default='./test_datasets/',type='new_dirpath',\
  help='the output directory.  Duplicate trees, trait tables, expected values and prediction files will be saved here.[default:%default]'),\
  make_option('--min_dist',default=0.0,type='float',\
  help='the minimum phylogenetic distance to use with the holdout method, if applicable.  Usually 0.0.[default:%default]'),\
   make_option('--suppress_tree_modification',default=False,action="store_true",help='If passed, modify only the trait table, not the tree . [default: %default]'),\
  make_option('--dist_increment',default=0.03,type='float',\
  help='the phylogenetic distance increment to use with the holdout method, if applicable.[default:%default]'),\
  make_option('--max_dist',default=0.45,type='float',\
  help='the maximum phylogenetic distance to use with the holdout method, if applicable.[default:%default]'),\
  make_option('--limit_to_tips',default='',type='string',\
  help='if specified, limit test dataset generation to specified tips (comma-separated).[default:%default]'),\
Example #29
0
from cogent.util.option_parsing import parse_command_line_parameters, make_option
from biom.parse import parse_biom_table
from picrust.predict_metagenomes import predict_metagenomes, calc_nsti
from picrust.util import make_output_dir_for_file,format_biom_table
from os import path
from numpy import around
import gzip

script_info = {}
script_info['brief_description'] = "This script converts metagenomic relative abundance back to sequence counts, by scaling the relative abundnace of each gene in each sample in a biom file by a user-supplied sequencing depth"
script_info['script_description'] = ""
script_info['script_usage'] = [("","Predict metagenomes from genomes.biom and otus.biom.","%prog -i otus.biom -c KEGG_acepic__predict_traits_97.biom.gz -o predicted_metagenomes.biom"),
                               ("","Change output format to plain tab-delimited:","%prog -f -i otus.biom -c KEGG_acepic_predict_traits_97.biom.gz -o predicted_metagenomes.tab")]
script_info['output_description']= "Output is a table of function counts (e.g. KEGG KOs) by sample ids."
script_info['required_options'] = [
 make_option('-s','--input_seq_depth_file',type='existing_filepath',help='an input tab-delimited table, with samples as the first column and an integer sequencing depth as the second'),
 make_option('-i','--input_count_table',type="existing_filepath",help='the input trait counts on per otu basis in biom format (can be gzipped)'),
 make_option('-o','--output_metagenome_table',type="new_filepath",help='the output file for the scaled metagenome')
]
script_info['version'] = __version__

def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    if opts.verbose:
        print "Loading sequencing depth table: ",opts.input_seq_depth_file
    scaling_factors = {}
    for sample_id,depth in parse_seq_count_file(open(opts.input_seq_depth_file,'U')):
        scaling_factors[sample_id]=depth    
    
    ext=path.splitext(opts.input_count_table)[1]
Example #30
0
    'brief_description'] = "Convert a BIOM table to a compatible STAMP profile table."
script_info[
    'script_description'] = "Metadata will be parsed and used as hiearachal data for STAMP."

script_info['script_usage'] = [\
("Minimum Requirments","","%prog table1.biom > table1.spf"),
("OTU table from QIIME","","%prog -m taxonomy otu_table.biom > otu_table.spf"),
("KO file from PICRUSt","","%prog -m KEGG_Description ko.biom > ko.spf"),
("KEGG Pathways table from PICRUSt","","%prog -m KEGG_Pathways ko_L3.biom > ko_L3.spf"),
("Function table from MG-RAST","","%prog -m ontology table1.biom > table1.spf")
                               ]

script_info['output_description'] = "Output is written to STDOUT"

script_info['optional_options'] = [\
    make_option('-m','--metadata',default=None,type="string",help='Name of metadata. [default: %default]')]

script_info['disallow_positional_arguments'] = False

script_info['version'] = __version__


def process_metadata(metadata, metadata_name, obs_id):
    if metadata_name == 'taxonomy':
        fixed_metadata = []
        for idx, val in enumerate(metadata):
            if (re.match(r'[a-z]__$', val)):
                fixed_metadata.append("Unclassified")
            else:
                fixed_metadata.append(val)
        return fixed_metadata
Example #31
0
from cogent.util.option_parsing import (parse_command_line_parameters, 
                                        make_option)
from format_blast_db_string import format_blast_db_string

script_info = {}
script_info['brief_description'] = "Prints to standard output the path to a \
blast database"
script_info['script_description'] = """This script takes a path and it\
 determines if the path is the base directory of blast database or it is a\
 fasta file with the reference sequences needed to create a blast database\
 on-the-fly"""
script_info['script_usage'] = [("Example:",
"Extract the content of the tgz file named 'in.tgz' into the\
 directory 'out_dir'",
"%prog -i in.tgz -o out_dir")]
script_info['output_description'] = """Prints through standard output the base\
 path of the blast database or the path to the reference sequence file,\
 depending on the input path"""
script_info['required_options'] = [
    make_option('-i', '--input_path', type="existing_path",
                help='Path to check')
]
script_info['optional_options'] = []
script_info['version'] = __version__

if __name__ == '__main__':
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    in_path = opts.input_path

    path = format_blast_db_string(in_path)
    print path
Example #32
0
script_info = {}
script_info['brief_description'] = "Runs genome evaluations on PICRUSt. "
script_info['script_description'] = "\
Using files created by make_test_datasets.py it runs each test dataset through the ASR (ancestral_state_reconstruction.py) and the genome prediction (predict_traits.py)"

script_info['script_usage'] = [\
("Minimum Requirments","Provide a directory that contains one or more datasets created by make_test_datasets.py and the original reference tree used","%prog -i test_datasets_dir -t reference_tree_fp"),\
("Specify output file","","%prog -i test_datasets_dir -t reference_tree_fp -o output_dir"),\
("Force the launching of jobs that alredy seem done by overwriting existing output files","", "%prog --force -i test_datasets_dir -t reference_tree_fp -o output_dir"),\
]

script_info['output_description']= "Predictions from predict_traits.py for each test dataset."

script_info['required_options'] = [\
make_option('-i','--input_dir',type="existing_dirpath",help='directory containing one or more test datasets'),\
make_option('-t','--ref_tree',type="existing_filepath",help='reference tree that was used with make_test_datasets'),\
]

# Choices for choice options
parallel_method_choices=['sge','torque','multithreaded']
predict_traits_choices =['asr_and_weighting','nearest_neighbor','random_neighbor']
asr_choices = ['ace_ml', 'ace_reml', 'ace_pic', 'wagner']
weighting_choices = ['linear','exponential','equal']

script_info['optional_options'] = [\
make_option('-o','--output_dir',type="new_dirpath",help='the output directory [default: <input_dir>]'),\
make_option('-j','--parallel_method',type='choice',\
            help='Method for parallelization. Valid choices are: '+\
            ', '.join(parallel_method_choices) + ' [default: %default]',\
            choices=parallel_method_choices,default='multithreaded'),\
make_option('-m','--prediction_method',type='choice',\
Example #33
0
    The exact weight function to use can be specified from the commandline (see options below).

    In general, this approach causes the prediction to be a weighted average of the closest reconstructed ancestor, and the either reconstructed or directly observed trait value of the organism of interest's sibling node(s).   
"""

METHOD_CHOICES = ['asr_and_weighting','nearest_neighbor','asr_only','weighting_only',\
  'random_neighbor']

WEIGHTING_CHOICES = ['exponential','linear','equal']

script_info['script_usage'] = [("","","")]
script_info['output_description']= "Output is a table (tab-delimited or .biom) of predicted character states"
script_info['required_options'] = [\
make_option('-i','--observed_trait_table',type="existing_filepath",\
  help='the input trait table describing directly observed traits (e.g. sequenced genomes) in tab-delimited format'),\
make_option('-t','--tree',type="existing_filepath",\
  help='the full reference tree, in Newick format')
]
script_info['optional_options'] = [\
 make_option('-o','--output_trait_table',type="new_filepath",\
   default='predicted_states.tsv',help='the output filepath for trait predictions [default: %default]'),\
 make_option('-a','--output_accuracy_metrics',type="new_filepath",\
   default=None,help='if specified, calculate accuracy metrics (i.e. how accurate does PICRUST expect its predictions to be?) and output them to this filepath [default: %default]'),\

 make_option('-m','--prediction_method',default='asr_and_weighting',choices=METHOD_CHOICES,help='Specify prediction method to use.  The recommended prediction method is set as default, so other options are primarily useful for control experiments and methods validation, not typical use.  Valid choices are:'+",".join(METHOD_CHOICES)+'.  "asr_and_weighting"(recommended): use ancestral state reconstructions plus local weighting with known tip nodes.  "nearest_neighbor": predict the closest tip on the tree with trait information.  "random_annotated_neighbor": predict a random tip on the tree with trait information. "asr_only": predict the traits of the last reconstructed ancestor, without weighting. "weighting_only": weight all genomes by distance, to the organism of interest using the specified weighting function and predict the weighted average.   [default: %default]'),\

 make_option('-w','--weighting_method',default='exponential',choices=WEIGHTING_CHOICES,help='Specify prediction the weighting function to use.  This only applies to prediction methods that incorporate local weighting ("asr_and_weighting" or "weighting_only")  The recommended weighting  method is set as default, so other options are primarily useful for control experiments and methods validation, not typical use.  Valid choices are:'+",".join(WEIGHTING_CHOICES)+'.  "exponential"(recommended): weight genomes as a negative exponent of distance.  That is 2^-d, where d is the tip-to-tip distance from the genome to the tip.  "linear": weight tips as a linear function of weight, normalized to the maximum possible distance (max_d -d)/d. "equal_weights": set all weights to a constant (ignoring branch length).   [default: %default]'),\
 
 
 make_option('-l','--limit_predictions_by_otu_table',type="existing_filepath",help='Specify a valid path to a legacy QIIME OTU table to perform predictions only for tips that are listed in the OTU table (regardless of abundance)'),\
 make_option('-g','--limit_predictions_to_organisms',help='Limit predictions to specific, comma-separated organims ids. (Generally only useful for lists of < 10 organism ids, for example when performing leave-one-out cross-validation).'),\
Example #34
0
script_info['script_description'] = ""
script_info['script_usage'] = [
    ("",
     "Partition the predicted contribution to the  metagenomes from each organism in the given OTU table, limited to only K00001, K00002, and K00004.",
     "%prog -i normalized_otus.biom -l K00001,K00002,K00004 -o ko_metagenome_contributions.tab"
     ),
    ("",
     "Partition the predicted contribution to the  metagenomes from each organism in the given OTU table, limited to only COG0001 and COG0002.",
     "%prog -i normalized_otus.biom -l COG0001,COG0002 -t cog -o cog_metagenome_contributions.tab"
     )
]
script_info[
    'output_description'] = "Output is a tab-delimited column indicating OTU contribution to each function."
script_info['required_options'] = [
    make_option('-i',
                '--input_otu_table',
                type='existing_filepath',
                help='the input otu table in biom format'),
    make_option('-o',
                '--output_fp',
                type="new_filepath",
                help='the output file for the metagenome contributions')
]
type_of_prediction_choices = ['ko', 'cog', 'rfam']
gg_version_choices = ['13_5', '18may2012']
script_info['optional_options'] = [\
    make_option('-t','--type_of_prediction',default=type_of_prediction_choices[0],type="choice",\
                    choices=type_of_prediction_choices,\
                    help='Type of functional predictions. Valid choices are: '+\
                    ', '.join(type_of_prediction_choices)+\
                    ' [default: %default]'),
    make_option('-g','--gg_version',default=gg_version_choices[0],type="choice",\
__license__ = "GPL"
__version__ = "0.9.1-dev"
__maintainer__ = "Daniel McDonald"
__email__ = "*****@*****.**"
__status__ = "Development"

from cogent.util.option_parsing import parse_command_line_parameters, make_option
from biom.parse import parse_biom_table

script_info = {}
script_info['brief_description'] = "Collapse table data to a specified level in a hierarchy."
script_info['script_description'] = "This script collapses hierarchical data to a specified level. For instance, often it is useful to examine KEGG results from a higher level within the pathway hierarchy. Many genes are sometimes involved in multiple pathways, and in these circumstances (also know as a one-to-many relationship), the gene is counted for each pathway. This has a side effect of increasing the total count of genes in the table."
script_info['script_usage'] = [("","Collapse predicted metagenome results.","""%prog -i metagenome.biom -c "KEGG Pathways" -l 3 -o metagenome_at_level3.biom""")]
script_info['output_description']= "Output table is contains gene counts at a higher level within a hierarchy."
script_info['required_options'] = [\
 make_option('-i','--input_fp',type="existing_filepath",help='the predicted metagenome table'),\
 make_option('-o','--output_fp',type='new_filepath', help='the resulting table'),
 make_option('-c','--metadata_category',type='string',help='the metadata category that describes the hierarchy'),
 make_option('-l','--level',type='int',help='the level in the hierarchy to collapse to. A value of 0 is not allowed, a value of 1 is the highest level, and any higher value nears the leaves of the hierarchy. For instance, if the hierarchy contains 4 levels, specifying 3 would collapse at one level above being fully specified.')
]
script_info['optional_options'] = [
 make_option('--ignore',type='string',default=None, help="Ignore the comma separated list of names. For instance, specifying --ignore_unknown=unknown,unclassified will ignore those labels while collapsing. The default is to not ignore anything. [default: %default]"),
 make_option('-f','--format_tab_delimited',action="store_true",default=False,help='output the predicted metagenome table in tab-delimited format [default: %default]')]
script_info['version'] = __version__

def make_collapse_f(category, level, ignore):
    """produce a collapsing function for one-to-many relationships"""
    # adjust level such that, for instance, level 1 corresponds to index 0
    if level > 0:
        level -= 1
script_info = {}
script_info['brief_description'] = "Convert a BIOM table to a compatible STAMP profile table."
script_info['script_description'] = "Metadata will be parsed and used as hiearachal data for STAMP."

script_info['script_usage'] = [\
("Minimum Requirments","","%prog table1.biom > table1.spf"),
("OTU table from QIIME","","%prog -m taxonomy otu_table.biom > otu_table.spf"),
("KO file from PICRUSt","","%prog -m KEGG_Description ko.biom > ko.spf"),
("KEGG Pathways table from PICRUSt","","%prog -m KEGG_Pathways ko_L3.biom > ko_L3.spf"),
("Function table from MG-RAST","","%prog -m ontology table1.biom > table1.spf")
]

script_info['output_description']= "Output is written to STDOUT"

script_info['optional_options'] = [\
    make_option('-m','--metadata',default=None,type="string",help='Name of metadata. [default: %default]')]


script_info['disallow_positional_arguments'] = False

script_info['version'] = __version__
       
def process_metadata(metadata,metadata_name,obs_id):
    if metadata_name =='taxonomy':
        fixed_metadata=[]
        for idx,val in enumerate(metadata):
            if(re.match(r'[a-z]__$',val)):
                fixed_metadata.append("Unclassified")
            else:
                fixed_metadata.append(val)
        return fixed_metadata
Example #37
0
script_info = {}
script_info['brief_description'] = "This script produces the actual metagenome functional predictions for a given OTU table."
script_info['script_description'] = ""
script_info['script_usage'] = [("","Predict KO abundances for a given OTU table picked against the newest version of GreenGenes.",\
                                    "%prog -i normalized_otus.biom -o predicted_metagenomes.biom"),
                               ("","Change output format to plain tab-delimited:","%prog -f -i normalized_otus.biom -o predicted_metagenomes.txt"),
                               ("","Predict COG abundances for a given OTU table.","%prog -i normalized_otus.biom -t cog -o cog_predicted_metagenomes.biom"),\
                               ("","Predict RFAM abundances for a given OTU table.","%prog -i normalized_otus.biom -t rfam -o rfam_predicted_metagenomes.biom"),\
                               ("","Output confidence intervals for each prediction.","%prog -i normalized_otus.biom -o predicted_metagenomes.biom --with_confidence"),\
                               ("","Predict metagenomes using a custom trait table in tab-delimited format.","%prog -i otu_table_for_custom_trait_table.biom -c custom_trait_table.tab -o output_metagenome_from_custom_trait_table.biom"),\
                               ("","Predict metagenomes,variances,and 95% confidence intervals for each gene category using a custom trait table in tab-delimited format.","%prog -i otu_table_for_custom_trait_table.biom --input_variance_table custom_trait_table_variances.tab -c custom_trait_table.tab -o output_metagenome_from_custom_trait_table.biom --with_confidence"),\
                                   ("","Change the version of GG used to pick OTUs","%prog -i normalized_otus.biom -g 18may2012 -o predicted_metagenomes.biom")]
script_info['output_description']= "Output is a table of function counts (e.g. KEGG KOs) by sample ids."
script_info['required_options'] = [
 make_option('-i','--input_otu_table',type='existing_filepath',help='the input otu table in biom format'),
 make_option('-o','--output_metagenome_table',type="new_filepath",help='the output file for the predicted metagenome')
]
type_of_prediction_choices=['ko','cog','rfam']
gg_version_choices=['13_5','18may2012']
script_info['optional_options'] = [\
    make_option('-t','--type_of_prediction',default=type_of_prediction_choices[0],type="choice",\
                    choices=type_of_prediction_choices,\
                    help='Type of functional predictions. Valid choices are: '+\
                    ', '.join(type_of_prediction_choices)+\
                    ' [default: %default]'),
    make_option('-g','--gg_version',default=gg_version_choices[0],type="choice",\
                    choices=gg_version_choices,\
                    help='Version of GreenGenes that was used for OTU picking. Valid choices are: '+\
                    ', '.join(gg_version_choices)+\
                    ' [default: %default]'),
Example #38
0
("Import from list of files","provide comma-separated list of files","%prog -i in1.txt,in2.txt"),
("Import from all files with regex","provide regex expression to filter files","%prog -i \"*.txt\"")]

script_info['output_description'] = "OPTIONAL"

### REQUIRED OPTIONS ###
script_info['required_options'] = [
    # INPUT [-i] FILEPATH
    # make_option('-i','--input_fps',help='the input filepaths'),
]

### OPTIONAL OPTIONS ###
script_info['optional_options'] = [

    # OUTPUT [-o] FILEPATH #
    make_option('-o', '--output_fp', help='Output Directory'),

    # INPUT [-i] FILEPATH(S) #
    make_option('-i', '--input_fps', help='Input Filepaths'),

    # OVERWRITE OUTPUT [-o] #
    make_option('-f',
                '--overwrite',
                action='store_true',
                help='Overwite the Output Directory [default: %default]',
                default=False),
]

script_info['version'] = __version__

Example #39
0
  set_label_conversion_fns, fix_tree_labels, convert_trait_table_entries
from numpy import array
from picrust.make_test_datasets import yield_test_trees,\
  make_distance_based_tip_label_randomizer,make_distance_based_exclusion_fn,\
  exclude_tip,write_tree, yield_genome_test_data_by_distance


script_info = {}
script_info['brief_description'] = "Generates test datasets for cross-validation studies of PICRUSt's accuracy"
script_info['script_description'] = ""
script_info['script_usage'] = [("","Generate holdout test trees from genome_tree.newick, and save results in the directory ./test_holdout_trees/.","%prog -t genome_tree.newick -o ./test_holdout_trees")]
script_info['output_description']= ""
method_choices = ['exclude_tips_by_distance','randomize_tip_labels_by_distance','collapse_tree_by_distance']
script_info['required_options'] = [
 
 make_option('-i','--input_trait_table',type='existing_filepath',\
   help='the input trait table.'),\
 make_option('-t','--input_tree',type='existing_filepath',\
   help='the input tree in Newick format'),\
]
script_info['optional_options'] = [\
  make_option('-o','--output_dir',default='./test_datasets/',type='new_dirpath',\
  help='the output directory.  Duplicate trees, trait tables, expected values and prediction files will be saved here.[default:%default]'),\
  make_option('--min_dist',default=0.0,type='float',\
  help='the minimum phylogenetic distance to use with the holdout method, if applicable.  Usually 0.0.[default:%default]'),\
   make_option('--suppress_tree_modification',default=False,action="store_true",help='If passed, modify only the trait table, not the tree . [default: %default]'),\
  make_option('--dist_increment',default=0.03,type='float',\
  help='the phylogenetic distance increment to use with the holdout method, if applicable.[default:%default]'),\
  make_option('--max_dist',default=0.45,type='float',\
  help='the maximum phylogenetic distance to use with the holdout method, if applicable.[default:%default]'),\
  make_option('--limit_to_tips',default='',type='string',\
  help='if specified, limit test dataset generation to specified tips (comma-separated).[default:%default]'),\
Example #40
0
#from qiime.util import load_qiime_config

#qiime_config = load_qiime_config()


script_info = {}
script_info['brief_description'] = "Starts multiple jobs in parallel on multicore or multiprocessor systems."
script_info['script_description'] = "This script is designed to start multiple jobs in parallel on systems with no queueing system, for example a multiple processor or multiple core laptop/desktop machine. This also serves as an example 'cluster_jobs' which users can use a template to define scripts to start parallel jobs in their environment."
script_info['script_usage'] = [\
 ("Example",\
 "Start each command listed in test_jobs.txt in parallel. The run id for these jobs will be RUNID. ",\
 "%prog -ms test_jobs.txt RUNID")]
script_info['output_description']= "No output is created."
script_info['required_options'] = []
script_info['optional_options'] = [\
 make_option('-m','--make_jobs',action='store_true',\
         help='make the job files [default: %default]'),\
 make_option('-s','--submit_jobs',action='store_true',\
         help='submit the job files [default: %default]'),\
 make_option('-d','--delay',action='store',type='int',default=0,
             help='Number of seconds to pause between launching each job [default: %default]'),
 make_option('-n','--num_jobs',action='store',type='int',\
             help='Number of jobs to group commands into. [default: %default]',\
                default=4)\
]
script_info['version'] = __version__
script_info['disallow_positional_arguments'] = False

def write_job_files(output_dir,commands,run_id,num_jobs=4):
    jobs_dir = '%s/jobs/' % output_dir
    job_fps = []
    if not exists(jobs_dir):
Example #41
0
script_info = {}
script_info['brief_description'] = "Runs genome evaluations on PICRUSt. "
script_info['script_description'] = "\
Using files created by make_test_datasets.py it runs each test dataset through the ASR (ancestral_state_reconstruction.py) and the genome prediction (predict_traits.py)"

script_info['script_usage'] = [\
("Minimum Requirments","Provide a directory that contains one or more datasets created by make_test_datasets.py and the original reference tree used","%prog -i test_datasets_dir -t reference_tree_fp"),\
("Specify output file","","%prog -i test_datasets_dir -t reference_tree_fp -o output_dir"),\
("Force the launching of jobs that alredy seem done by overwriting existing output files","", "%prog --force -i test_datasets_dir -t reference_tree_fp -o output_dir"),\
]

script_info['output_description']= "Predictions from predict_traits.py for each test dataset."

script_info['required_options'] = [\
make_option('-i','--input_dir',type="existing_dirpath",help='directory containing one or more test datasets'),\
make_option('-t','--ref_tree',type="existing_filepath",help='reference tree that was used with make_test_datasets'),\
]

# Choices for choice options
parallel_method_choices=['sge','torque','multithreaded']
predict_traits_choices =['asr_and_weighting','nearest_neighbor','random_neighbor']
asr_choices = ['ace_ml', 'ace_reml', 'ace_pic', 'wagner']
weighting_choices = ['linear','exponential','equal']

script_info['optional_options'] = [\
make_option('-o','--output_dir',type="new_dirpath",help='the output directory [default: <input_dir>]'),\
make_option('-j','--parallel_method',type='choice',\
            help='Method for parallelization. Valid choices are: '+\
            ', '.join(parallel_method_choices) + ' [default: %default]',\
            choices=parallel_method_choices,default='multithreaded'),\
make_option('-m','--prediction_method',type='choice',\
Example #42
0
  make_option
from picrust.evaluate_test_datasets import calculate_accuracy_stats_from_observations
from biom import load_table, Table
from picrust.util import make_output_dir_for_file
from random import shuffle

script_info = {}
script_info['brief_description'] = "Compare the accuracy of biom files (expected and observed) either by observations (default) or by samples."
script_info['script_description'] =\
    """ """
script_info['script_usage'] = [\
    ("Example 1","Compare an observed table to an expected table using relative abundance","%prog -e expected_ra.biom -o compare_results_ra.tab observed_ra.biom"),
    ("Example 2","Compare an observed table to an expected table using real counts","%prog --not_relative_abundance -e expected.biom -o compare_results.tab observed.biom")]
script_info['output_description']= "Outputs will be tab delimited file with various accuracy metrics."
script_info['required_options'] = [
 make_option('-e','--exp_trait_table_fp',type="existing_filepath",help='the expected trait table (biom format)'),\
 make_option('-o','--output_fp',type="new_filepath",help='the output file'),
]
script_info['optional_options'] = [
  make_option('-c','--compare_observations',action="store_true",default=False,help='Calculate accuracy values by comparing between observations (instead of between samples) [default: %default]'),\
  make_option('-n','--normalize',action="store_true",default=False,help='Convert both expected and observed tables to relative abundances (instead of observations) [default: %default]'),
  make_option('-l','--limit_to_expected_observations',action="store_true",default=False,help='Ignore observations that are not in the expected table[default: %default]'),
  make_option('--limit_to_observed_observations',action="store_true",default=False,help='Ignore observations that are not in the observed table[default: %default]'),
  make_option('-s','--shuffle_samples',action="store_true",default=False,help='Shuffle samples ids randomly before measuring accuracy[default: %default]'),
  make_option('--not_relative_abundance_scores',action="store_true",default=False,help='Round numbers (instead of taking ceil() which is used for RA) before calculating TP,FP,FN,TN [default: %default]')

        ]
script_info['disallow_positional_arguments'] = False
script_info['version'] = __version__

Example #43
0
__license__ = "GPL"
__version__ = "0.9.2-dev"
__maintainer__ = "Greg Caporaso"
__email__ = "*****@*****.**"

from cogent.util.option_parsing import parse_command_line_parameters, make_option

script_info = {}
script_info['brief_description'] = ""
script_info['script_description'] = ""
script_info['script_usage'] = [("","","")]
script_info['output_description']= ""
script_info['required_options'] = []
script_info['optional_options'] = [
 make_option('--suppress_unit_tests',
             action='store_true',
             help='suppress unit tests [default: %default]',
             default=False),
 make_option('--suppress_script_usage_tests',
             action='store_true',
             help='suppress script usage tests [default: %default]',
             default=False),
 make_option('--unittest_glob',
             help='wildcard pattern to match tests to run [default: run all]',
             default=None),
 make_option('--script_usage_tests',
             help='comma-separated list of tests to run [default: run all]',
             default=None),
]
script_info['version'] = __version__
script_info['help_on_no_arguments'] = False
from picrust.format_tree_and_trait_table import load_picrust_tree
from picrust.parallel import submit_jobs, system_call, wait_for_output_files, grouper
from os import makedirs, remove, popen
from os.path import join, splitext
from cogent.app.util import get_tmp_filename
import gzip

script_info = {}
script_info['brief_description'] = "Runs predict_traits.py in parallel"
script_info['script_description'] = ""
script_info['script_usage'] = [\
("","Basic","%prog -i trait_table.tab -t reference_tree.newick -r asr_counts.tab -c asr_ci.tab -o predict_traits.tab")]
script_info['output_description'] = ""

script_info['required_options'] = [\
    make_option('-i','--observed_trait_table',type="existing_filepath",\
                    help='the input trait table describing directly observed traits (e.g. sequenced genomes) in tab-delimited format'),
    make_option('-t','--tree',type="existing_filepath",\
                    help='the full reference tree, in Newick format'),
    make_option('-o','--output_trait_table',type="new_filepath",\
                    help='the output filepath for trait predictions'),\
]

parallel_method_choices = ['sge', 'torque', 'multithreaded']

script_info['optional_options'] = [\
    make_option('-a','--calculate_accuracy_metrics',default=False,action="store_true",\
                    help='if specified, calculate accuracy metrics (i.e. how accurate does PICRUSt expect its predictions to be?) and add to output file [default: %default]'),
    make_option('-r','--reconstructed_trait_table',
                type="existing_filepath",default=None,
                help='the input trait table describing reconstructed traits (from ancestral_state_reconstruction.py) in tab-delimited format [default: %default]'),
Example #45
0
__email__ = "*****@*****.**"
__status__ = "Development"

from cogent.util.option_parsing import parse_command_line_parameters, make_option
from shutil import copyfile
from tgz_manager import extract_from_tgz, ERROR_MSG

script_info = {}
script_info['brief_description'] = "Extract the content of a tgz file."
script_info['script_description'] = """If input_tgz has one file: extract it an rename it as output_path.
If input_tgz has multiple files: extract them in a directory named output_path.
If input_tgz is not a tgz file (must be a file, not a directory): rename the input file as output_path"""
script_info['script_usage'] = [("Example:", "Extract the content of the tgz file named 'in.tgz' into the directory 'out_dir'", "%prog -i in.tgz -o out_dir")]
script_info['output_description'] = ""
script_info['required_options'] = [
	make_option('-i', '--input_tgz', type="existing_filepath",
				help='File path for the tgz file to uncompress'),
	make_option('-o', '--output_path', type="new_path",
				help='Path where to extract the contents of the tgz file')
]
script_info['optional_options'] = []
script_info['version'] = __version__

def extract_if_is_tgz(tgz_fp, output_path):
	try:
		extract_from_tgz(tgz_fp, output_path)
	except ValueError, e:
		# The input
		if str(e) == ERROR_MSG:
			copyfile(tgz_fp, output_path)
		else:
			raise ValueError, e
Example #46
0
#from qiime.util import load_qiime_config

#qiime_config = load_qiime_config()


script_info = {}
script_info['brief_description'] = "Starts multiple jobs in parallel on multicore or multiprocessor systems."
script_info['script_description'] = "This script is designed to start multiple jobs in parallel on systems with no queueing system, for example a multiple processor or multiple core laptop/desktop machine. This also serves as an example 'cluster_jobs' which users can use a template to define scripts to start parallel jobs in their environment."
script_info['script_usage'] = [\
 ("Example",\
 "Start each command listed in test_jobs.txt in parallel. The run id for these jobs will be RUNID. ",\
 "%prog -ms test_jobs.txt RUNID")]
script_info['output_description']= "No output is created."
script_info['required_options'] = []
script_info['optional_options'] = [\
 make_option('-m','--make_jobs',action='store_true',\
         help='make the job files [default: %default]'),\
 make_option('-s','--submit_jobs',action='store_true',\
         help='submit the job files [default: %default]'),\
 make_option('-n','--num_jobs',action='store',type='int',\
             help='Number of jobs to group commands into. [default: %default]',\
                default=4)\
]
script_info['version'] = __version__
script_info['disallow_positional_arguments'] = False

def write_job_files(output_dir,commands,run_id,num_jobs=4):
    jobs_dir = '%s/jobs/' % output_dir
    job_fps = []
    if not exists(jobs_dir):
        try:
            makedirs(jobs_dir)
script_info['script_description'] =\
  """Reformats scripts and trait tables.  Optional fixes include:
        -- Add short (epsilon) branch lengths in place of 0 length branches
        -- Filter out taxa that don't match between tree and trait table
        -- Output tree in NEXUS format
        -- Ensure tree is bifurcating (remove polytomies using very short branches)
        -- Convert floating point trait values to integers
        -- Add a short branch length to the root branch (required by BayesTraits)
        -- Remove internal node names (required by BayesTraits)
        """

script_info['script_usage'] = [\
    ("Example 1","Reformat a tree and trait table with default options:","%prog -i traits.tab -t tree.nwk -o ./format_output/")]
script_info['output_description']= "Outputs a reformatted tree and trait table."
script_info['required_options'] = [\
          make_option('-t','--input_tree',type="existing_filepath",help='the input tree (Newick format)'),\
          make_option('-i','--input_trait_table',type="existing_filepath",help='the input trait table (QIIME OTU table format)')
                  ]

delimiter_choices = ['tab','space','comma']
script_info['optional_options'] = [\
          make_option('-m','--tree_to_trait_mapping',default=None,type="existing_filepath",help='a two-column, tab-delimited text file mapping identifiers in the tree(column 1) to identifiers in the trait table (column 2). If supplied, the identifiers in the trait table will be converted to match the identifiers in the tree. (This mapping does not need to be supplied if the tree and trait table already use a common set of identifiers.) [default: %default]'),\
          make_option('-o','--output_dir',default='./formatted/',type="new_filepath",help='the output directory [default: %default]'),\
          make_option('--input_table_delimiter',default='tab',type="choice",choices=delimiter_choices,\
            help='The character delimiting fields in the input trait table. Valid choices are:'+','.join(delimiter_choices)+' [default: %default]'),\
          make_option('--output_table_delimiter',default='tab',type="choice",choices=delimiter_choices,\
            help='The character delimiting fields in the output trait table. Valid choices are:'+','.join(delimiter_choices)+' [default: %default]'),\
          make_option('--suppress_bifurcating',default=False,action="store_true",help="If set, don't ensure that tree is fully bifurcating. [default: %default]"),\
          make_option('-n','--convert_to_nexus',default=False,action="store_true",help='Convert tree to NEXUS format, including a translate block mapping tip names to numbers. [default: %default]'),\
          make_option('-c','--convert_values_to_ints',default=False,action="store_true",help='Convert the values for each character state to integers. [default: %default]'),\
          make_option('--no_minimum_branch_length',default=False,action="store_true",help="If set, don't ensure all branches have at least a small but non-zero branchlength. [default: %default]"),\
script_info['script_description'] = ""
script_info['script_usage'] = [
                               ("","Predict KO abundances for a given OTU table picked against the newest version of GreenGenes.", "%prog -i normalized_otus.biom -o predicted_metagenomes.biom"),
                               ("","Change output format to plain tab-delimited:","%prog -f -i normalized_otus.biom -o predicted_metagenomes.txt"),
                               ("","Predict COG abundances for a given OTU table.","%prog -i normalized_otus.biom -t cog -o cog_predicted_metagenomes.biom"),\
                               ("","Predict RFAM abundances for a given OTU table.","%prog -i normalized_otus.biom -t rfam -o rfam_predicted_metagenomes.biom"),\
                               ("","Output confidence intervals for each prediction.","%prog -i normalized_otus.biom -o predicted_metagenomes.biom --with_confidence"),\
                               ("","Predict metagenomes using a custom trait table in tab-delimited format.","%prog -i otu_table_for_custom_trait_table.biom -c custom_trait_table.tab -o output_metagenome_from_custom_trait_table.biom"),\
                               ("","Predict metagenomes,variances,and 95% confidence intervals for each gene category using a custom trait table in tab-delimited format.","%prog -i otu_table_for_custom_trait_table.biom --input_variance_table custom_trait_table_variances.tab -c custom_trait_table.tab -o output_metagenome_from_custom_trait_table.biom --with_confidence"),\
                               ("","Change the version of GG used to pick OTUs","%prog -i normalized_otus.biom -g 18may2012 -o predicted_metagenomes.biom")
                                ]
script_info[
    'output_description'] = "Output is a table of function counts (e.g. KEGG KOs) by sample ids."
script_info['required_options'] = [
    make_option('-i',
                '--input_otu_table',
                type='existing_filepath',
                help='the input otu table in biom format'),
    make_option('-o',
                '--output_metagenome_table',
                type="new_filepath",
                help='the output file for the predicted metagenome')
]
type_of_prediction_choices = ['ko', 'cog', 'rfam']
gg_version_choices = ['13_5', '18may2012']
script_info['optional_options'] = [\
    make_option('-t','--type_of_prediction',default=type_of_prediction_choices[0],type="choice",\
                    choices=type_of_prediction_choices,\
                    help='Type of functional predictions. Valid choices are: '+\
                    ', '.join(type_of_prediction_choices)+\
                    ' [default: %default]'),
    make_option('-g','--gg_version',default=gg_version_choices[0],type="choice",\
from cogent.util.option_parsing import parse_command_line_parameters, make_option
from picrust.count import wagner_for_picrust
from picrust.ace import ace_for_picrust
from picrust.ancestral_state_reconstruction import run_asr_in_parallel
from picrust.util import make_output_dir_for_file,make_output_dir

script_info = {}
script_info['brief_description'] = "Runs ancestral state reconstruction given a tree and trait table"
script_info['script_description'] = "\
Provides a common interface for running various ancenstral state reconstruction methods (e.g. ACE, BayesTraits, etc.)."
script_info['script_usage'] = [\
("Example 1","Provide a tree file and trait table file:","%prog -i trait_table.tab -t pruned_tree.newick -o asr_counts.tab -c asr_ci.tab")]
script_info['output_description']= "A table containing trait information for internal nodes of the tree."

script_info['required_options'] = [\
make_option('-t','--input_tree_fp',type="existing_filepath",help='the tree to use for ASR'),\
make_option('-i','--input_trait_table_fp',type="existing_filepath",help='the trait table to use for ASR'),\
]
asr_method_choices=['ace_ml','ace_reml','ace_pic','wagner']
parallel_method_choices=['sge','torque','multithreaded']

script_info['optional_options'] = [\
make_option('-m','--asr_method',type='choice',
                help='Method for ancestral state reconstruction. Valid choices are: '+\
                ', '.join(asr_method_choices) + ' [default: %default]',\
                choices=asr_method_choices,default='ace_pic'),\
make_option('-o','--output_fp',type="new_filepath",help='output trait table [default:%default]',default='asr_counts.tab'),\
make_option('-c','--output_ci_fp',type="new_filepath",help='output table containing 95% confidence intervals, loglik, and brownian motion parameters for each asr prediction [default:%default]',default='asr_ci.tab'),\
make_option('-p','--parallel',action="store_true",help='allow parallelization of asr',default=False),\
make_option('-j','--parallel_method',type='choice',
                help='Method for parallelizaation. Valid choices are: '+\
                ', '.join(parallel_method_choices) + ' [default: %default]',\
from picrust.format_tree_and_trait_table import load_picrust_tree
from picrust.parallel import submit_jobs, system_call,wait_for_output_files,grouper
from os import makedirs, remove, popen
from os.path import join,splitext
from cogent.app.util import get_tmp_filename
import gzip

script_info = {}
script_info['brief_description'] = "Runs predict_traits.py in parallel"
script_info['script_description'] = ""
script_info['script_usage'] = [\
("","Basic","%prog -i trait_table.tab -t reference_tree.newick -r asr_counts.tab -c asr_ci.tab -o predict_traits.tab")]
script_info['output_description']= ""

script_info['required_options'] = [\
    make_option('-i','--observed_trait_table',type="existing_filepath",\
                    help='the input trait table describing directly observed traits (e.g. sequenced genomes) in tab-delimited format'),
    make_option('-t','--tree',type="existing_filepath",\
                    help='the full reference tree, in Newick format'),
    make_option('-o','--output_trait_table',type="new_filepath",\
                    help='the output filepath for trait predictions'),\
]

parallel_method_choices=['sge','torque','multithreaded']

script_info['optional_options'] = [\
    make_option('-a','--calculate_accuracy_metrics',default=False,action="store_true",\
                    help='if specified, calculate accuracy metrics (i.e. how accurate does PICRUSt expect its predictions to be?) and add to output file [default: %default]'),
    make_option('-r','--reconstructed_trait_table',
                type="existing_filepath",default=None,
                help='the input trait table describing reconstructed traits (from ancestral_state_reconstruction.py) in tab-delimited format [default: %default]'),
Example #51
0
try:
    from cogent.util.option_parsing import parse_command_line_parameters, \
            make_option
    cogent_cl_parsing = True
except ImportError:
    from sys import argv
    cogent_cl_parsing = False

if cogent_cl_parsing:
    script_info = {}
    script_info['brief_description'] = "Subset a BIOM file."
    script_info['script_description'] = "Subset a BIOM file, over either the observations or samples, without fully parsing it. This script is intended to assist working with very large tables when tight on memory, or as a light weight way to subset a full table. Currently, it is possible to produce tables with rows or columns (observations or samples) that are fully zerod."
    script_info['script_usage'] = [("","Subset the observations in my_data.biom file.","%prog -i my_data.biom -a observations -s file_with_ids")]
    script_info['output_description']= ""
    script_info['required_options'] = [
     make_option('-i','--biom_fp',type="existing_filepath",
                 help='the BIological Observation Matrix filepath'),
     make_option('-a','--axis', type='choice',
                  choices=['observations','samples'],
                  help="The axis to subset over"),
     make_option('-s','--ids_fp',type="existing_filepath",
                 help="A file containing a single column of IDs to retain"),
     make_option('-o','--output_fp',type="new_filepath",
                 help="A file to write the result to")
    ]
    script_info['version'] = __version__
else:
    from optparse import OptionParser, make_option
    options = [
     make_option('-i','--biom_fp',type="string",
                 help='the BIological Observation Matrix filepath'),
     make_option('-a','--axis', type='string',
from picrust.metagenome_contributions import partition_metagenome_contributions
from picrust.util import make_output_dir_for_file, get_picrust_project_dir, convert_precalc_to_biom
from os import path
from os.path import join
import gzip

script_info = {}
script_info['brief_description'] = "This script partitions metagenome functional contributions according to function, OTU, and sample, for a given OTU table."
script_info['script_description'] = ""
script_info['script_usage'] = [
("","Partition the predicted contribution to the  metagenomes from each organism in the given OTU table, limited to only K00001, K00002, and K00004.","%prog -i normalized_otus.biom -l K00001,K00002,K00004 -o ko_metagenome_contributions.tab"),
("","Partition the predicted contribution to the  metagenomes from each organism in the given OTU table, limited to only COG0001 and COG0002.","%prog -i normalized_otus.biom -l COG0001,COG0002 -t cog -o cog_metagenome_contributions.tab")
]
script_info['output_description']= "Output is a tab-delimited column indicating OTU contribution to each function."
script_info['required_options'] = [
 make_option('-i','--input_otu_table',type='existing_filepath',help='the input otu table in biom format'),
 make_option('-o','--output_fp',type="new_filepath",help='the output file for the metagenome contributions')
]
type_of_prediction_choices=['ko','cog','rfam']
gg_version_choices=['13_5','18may2012']
script_info['optional_options'] = [\
    make_option('-t','--type_of_prediction',default=type_of_prediction_choices[0],type="choice",\
                    choices=type_of_prediction_choices,\
                    help='Type of functional predictions. Valid choices are: '+\
                    ', '.join(type_of_prediction_choices)+\
                    ' [default: %default]'),
    make_option('-g','--gg_version',default=gg_version_choices[0],type="choice",\
                    choices=gg_version_choices,\
                    help='Version of GreenGenes that was used for OTU picking. Valid choices are: '+\
                    ', '.join(gg_version_choices)+\
                    ' [default: %default]'),
Example #53
0
from cogent.util.option_parsing import parse_command_line_parameters,\
  make_option
from picrust.evaluate_test_datasets import unzip,evaluate_test_dataset,\
 update_pooled_data, run_accuracy_calculations_on_biom_table,run_accuracy_calculations_on_pooled_data,\
 format_scatter_data, format_correlation_data, run_and_format_roc_analysis

from biom.parse import parse_biom_table, convert_biom_to_table

script_info = {}
script_info['brief_description'] = "Pool character predictions within a directory, given directories of expected vs. observed test results"
script_info['script_description'] =\
    """The script finds all paired expected and observed values in a set of directories and generates pooled .biom files in a specified output directory"""
script_info['script_usage'] = [("","Pool .biom files according to holdout_distance.","%prog -i obs_otu_table_dir -e exp_otu_table_dir -p distance -o./evaluation_results/pooled_by_distance/")]
script_info['output_description']= "Outputs will be obs,exp data points for the comparison"
script_info['required_options'] = [
 make_option('-i','--trait_table_dir',type="existing_dirpath",help='the input trait table directory (files in biom format)'),\
 make_option('-e','--exp_trait_table_dir',type="existing_dirpath",help='the input expected trait table directory (files in biom format)'),\
 make_option('-o','--output_dir',type="new_dirpath",help='the output directory'),
]
script_info['optional_options'] = [
        make_option('-f','--field_order',\
                default='file_type,prediction_method,weighting_method,holdout_method,distance,organism',help='pass comma-separated categories, in the order they appear in file names.   Categories are "file_type","prediction_method","weighting_method","holdout_method" (randomization vs. holdout),"distance",and "organism".  Example:  "-f file_type,test_method,asr_method specifies that files will be in the form: predict_traits--distance_exclusion--wagner.  Any unspecified values are set to "not_specified".  [default: %default]'),\
        make_option('-p','--pool_by',\
          default=False,help='pass comma-separated categories to pool results by those metadata categories. Valid categories are: holdout_method, prediction_method,weighting_method,distance and organism. For example, pass "distance" to output results pooled by holdout distance in addition to holdout method and prediction method  [default: %default]')
]
script_info['version'] = __version__


def iter_prediction_expectation_pairs(obs_dir_fp,exp_dir_fp,file_name_field_order,file_name_delimiter,verbose=False):
    """Iterate pairs of observed, expected biom file names"""
    input_files=sorted(listdir(obs_dir_fp))
Example #54
0
from scipy.optimize import basinhopping,brute,differential_evolution
from scipy.stats import norm

from numpy import diff,inf,all,array

#Set up script parameters
script_info = {}
script_info['brief_description'] = "This script fits time-series PCoA data to Ornstein-Uhlenbeck models [CURRENTLY DEMO ONLY]."
script_info['script_description'] = "This script fits microbiome change over time to Ornstein-Uhlenbeck (OU) models."
script_info['script_usage'] = [
                               ("","Demo fitting an OU model using default parameters.", "%prog -o ./simulation_results")
                                ]
script_info['output_description']= "Output is a tab-delimited data table of fitting results"
script_info['required_options'] = [
 make_option('-o','--output',type="new_filepath",help='the output folder for the simulation results')
]
script_info['optional_options'] = [\
    make_option('--fit_method',default="basinhopping",type="choice",choices=['basinhopping','differential_evolution','brute'],help="Global optimization_method to use [default:%default]")
   ]

script_info['version'] = __version__

def fit_OU_process(data,dts):
    """Return the parameters of an OU process over data
    
    Strategy: this method combines two tools: parameteric
    fitting of normal distributions, and non-parametric 
    global optimization.

    1. generate a relativized version of all data,