def setUp(self):
     """ """
     
     self.headers=['head1','head2','head3']
     self.test_option_file=make_option('-i', '--coord_fname',
             help='Input principal coordinates filepath',
             type='existing_path')
     self.test_option_colorby=make_option('-b', '--colorby', dest='colorby',\
             help='Comma-separated list categories metadata categories' +\
             ' (column headers) [default=color by all]')
     self.test_option_custom_axes=make_option('-a', '--custom_axes',
             help='This is the category from the metadata mapping file' +\
             ' [default: %default]')
     self.test_option_choice=make_option('-k', '--background_color',
             help='Background color to use in the plots.[default: %default]',
             default='black',type='choice',choices=['black','white'])
     self.test_option_float=make_option('--ellipsoid_opacity',
             help='Used only when plotting ellipsoids for jackknifed' +\
             ' beta diversity (i.e. using a directory of coord files' +\
             ' [default=%default]',
             default=0.33,type=float)
     self.test_option_int=make_option('--n_taxa_keep',
             help='Used only when generating BiPlots. This is the number '+\
             ' to display. Use -1 to display all. [default: %default]',
             default=10,type=int)
     self.test_option_true=make_option('--suppress_html_output',
             dest='suppress_html_output',\
             default=False,action='store_true',
             help='Suppress HTML output. [default: %default]')
     self.test_option_false=make_option('--suppress_html_output',
             dest='suppress_html_output',\
             default=True,action='store_false',
             help='Suppress HTML output. [default: %default]')
     
     self.option_labels={'coord_fname':'Principal coordinates filepath',
                                  'colorby': 'Colorby category',
                                  'background_color': 'Background color',
                                  'ellipsoid_opacity':'Ellipsoid opacity',
                                  'n_taxa_keep': '# of taxa to keep',
                                  'custom_axes':'Custom Axis'}
         
     self.script_dir = get_qiime_scripts_dir()
     self.test_script_info=get_script_info(self.script_dir,
                                                 'make_qiime_rst_file')
Example #2
0
script_info[
    'brief_description'] = """Filter OTU mapping file and sequences by SampleIDs"""
script_info[
    'script_description'] = """This filter allows for the removal of sequences and OTUs containing user-specified Sample IDs, for instance, the removal of negative control samples. This script identifies OTUs containing the specified Sample IDs and removes its corresponding sequence from the sequence collection."""
script_info['script_usage'] = []
script_info['script_usage'].append(
    ("""Example:""",
     """The following command can be used, where all options are passed (using the resulting OTU file from pick_otus.py, FASTA file from split_libraries.py and removal of sample 'PC.636') with the resulting data being written to the output directory "filtered_otus/":""",
     """%prog -i seqs_otus.txt -f seqs.fna -s PC.636 -o filtered_otus/"""))
script_info[
    'output_description'] = """As a result a new OTU and sequence file is generated and written to a randomly generated folder where the name of the folder starts with "filter_by_otus" Also included in the folder, is another FASTA file containing the removed sequences, leaving the user with 3 files."""

script_info['required_options'] = [
    options_lookup['otu_map_as_primary_input'],
    options_lookup['input_fasta'],
    make_option('-s', '--samples_to_extract', type='string', help='This is a list of sample \
ids, which should be removed from the OTU file')]

script_info['optional_options'] = [
    options_lookup['output_dir']
]

script_info['version'] = __version__


def main():
    """opens files as necessary based on prefs"""
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    data = {}

    fasta_file = opts.input_fasta_fp
Example #3
0
script_info={}
script_info['brief_description']="""Plot several PCoA files on the same 3D plot"""
script_info['script_description']="""This script generates a 3D plot comparing two or more sets of principal coordinates using as input two or more principal coordinates files. Edges are drawn in the plot connecting samples with the same ID across different principal coordinates files. The user can also include a file listing the edges to be drawn in the plot, in which case the user may submit any number of principal coordinates files (including one). If the user includes the edges file, the sample IDs need not match between principal coordinates files.

The principal_coordinates coordinates files are obtained by applying "principal_coordinates.py" to a file containing beta diversity measures. The beta diversity files are optained by applying "beta_diversity.py" to an OTU table. One may apply "transform_coordinate_matrices.py" to the principal_coordinates coordinates files before using this script to compare them."""
script_info['script_usage']=[]
script_info['script_usage'].append(("Example 1","""Compare two pca/pcoa files in the same 3d plot where each sample ID is assigned its own color:""","""compare_3d_plots.py -i 'raw_pca_data1.txt,raw_pca_data2.txt'"""))
script_info['script_usage'].append(("Example 2","""Compare two pca/pcoa files in the same 3d plot with two coloring schemes (Day and Type):""","""compare_3d_plots.py -i 'raw_pca_data1.txt,raw_pca_data2.txt' -m input_map.txt -b 'Day,Type'"""))
script_info['script_usage'].append(("Example 3","""Compare two pca/pcoa files in the same 3d plot for a combination of label headers from a mapping file: ""","""compare_3d_plots.py -i 'raw_pca_data1.txt,raw_pca_data2.txt' -m input_map.txt -b 'Type&&Day' -o ./test/"""))
script_info['script_usage'].append(("Example 4","""Compare two pca/pcoa files in the same 3d plot for a combination of label headers from a mapping file: ""","""compare_3d_plots.py -i 'raw_pca_data1.txt,raw_pca_data2.txt' -m input_map.txt -b 'Type&&Day' -o ./test/"""))
script_info['script_usage'].append(("Example 5","""Pass in a list of desired edges and only one pca/pcoa file: ""","""compare_3d_plots.py -i 'raw_pca_data1.txt' -e edges.txt -m input_map.txt -b 'Type&&Day' -o ./test/"""))
script_info['script_usage'].append(("Example 6","""Pass in a list of desired edges and only one pca/pcoa file: ""","""compare_3d_plots.py -i 'raw_pca_data1.txt,raw_pca_data2.txt' -e edges.txt -m input_map.txt -b 'Type&&Day' -o ./test/"""))
script_info['output_description']="""This script results in a folder containing an html file which displays the 3D Plots generated."""
script_info['required_options']= [\
    make_option('-i', '--coord_fnames',type='string',\
        help='This is comma-separated list of the paths to the principal \
coordinates files (i.e., resulting file \
from principal_coordinates.py), e.g \'pcoa1.txt,pcoa2.txt\''),
 make_option('-m', '--map_fname', dest='map_fname',type='existing_filepath', \
     help='This is the user-generated mapping file [default=%default]'),
]

script_info['optional_options']= [\
 make_option('-b', '--colorby', dest='colorby',type='string',\
     help='This is a list of the categories to color by in the plots from the \
user-generated mapping file. The categories must match the name of a column \
header in the mapping file exactly and multiple categories can be list by comma \
separating them without spaces. The user can also combine columns in the \
mapping file by separating the categories by "&&" without spaces \
[default=%default]'),
 make_option('-a', '--custom_axes',type='string',help='This is a category or list of \
categories from the user-generated mapping file to use as a custom axis in the \
Example #4
0
centroids.fasta: The cluster representatives of each cluster

singletons.fasta: contains all unclustered reads

denoiser_mapping.txt: This file contains the actual clusters. The cluster centroid is given first,
                    the cluster members follow after the ':'.

checkpoints/ : directory with checkpoints

Note that the centroids and singleton files are disjoint. For most downstream analyses one wants to cat the two files.
"""

script_info['required_options'] = [

    make_option('-i', '--input_files', action='store',
                type='existing_filepaths', dest='sff_fps',
                help='path to flowgram files (.sff.txt), ' +
                'comma separated')
]

script_info['optional_options'] = [

    make_option('-f', '--fasta_fp', action='store',
                type='string', dest='fasta_fp',
                help='path to fasta input file. ' +
                'Reads not in the fasta file are filtered out ' +
                'before denoising. File format is as produced by ' +
                'split_libraries.py ' +
                '[default: %default]',
                default=None),

    make_option('-o', '--output_dir', action='store',
Test-Statistic - the value of the test statistic for the given test
P - the raw P value returned by the given test.
FDR_P - the P value corrected by the Benjamini-Hochberg FDR procedure for
 multiple comparisons.
Bonferroni_P - the P value corrected by the Bonferroni procedure for multiple
 comparisons.
groupX_mean - there will be as many of these headers as there are unique values
 in the mapping file under the category passed with the -c option. Each of these
 fields will contain the mean frequency/abundance/count of the given OTU for the
 given sample group.
Taxonomy - this column will be present only if the biom table contained Taxonomy
 information. It will contain the taxonomy of the given OTU.
"""
script_info['required_options'] = [
    make_option('-i', '--otu_table_fp',
                help='path to biom format table',
                type='existing_path'),
    make_option('-m', '--mapping_fp', type='existing_filepath',
                help='path to category mapping file'),
    make_option('-c', '--category', type='string',
                help='name of the category over which to run the analysis'),
    make_option('-o', '--output_fp', type='new_filepath',
                help='path to the output file or directory')]

script_info['optional_options'] = [
    make_option(
        '-s', '--test', type="choice", choices=GROUP_TEST_CHOICES.keys(),
        default='kruskal_wallis', help='Test to use. Choices are:\n%s' %
        (', '.join(GROUP_TEST_CHOICES.keys())) + '\n\t' + '[default: %default]'),
    make_option('--metadata_key', default='taxonomy', type=str,
                help='Key to extract metadata from biom table. default: %default]'),
    'brief_description'] = """Perform multiple subsamplings/rarefactions on an otu table"""
script_info[
    'script_description'] = """To perform bootstrap, jackknife, and rarefaction analyses, the otu table must be subsampled (rarefied).  This script rarefies, or subsamples, OTU tables.  This does not provide curves of diversity by number of sequences in a sample.  Rather it creates a series of subsampled OTU tables by random sampling (without replacement) of the input OTU table.  Samples that have fewer sequences then the requested rarefaction depth for a given output otu table are omitted from those ouput otu tables.  The pseudo-random number generator used for rarefaction by subsampling is NumPy's default - an implementation of the Mersenne twister PRNG."""
script_info['script_usage'] = []

script_info['script_usage'].append(
    ("""Generate rarefied OTU tables:""",
     """Generate rarefied OTU tables beginning with 10 (-m) sequences/sample through 140 (-x) sequences per sample in steps of of 10 (-s), performing 2 iterations at each sampling depth (-n). All resulting OTU tables will be written to 'rarefied_otu_tables' (-o). Any sample containing fewer sequences in the input file than the requested number of sequences per sample is removed from the output rarefied otu table.""",
     """%prog -i otu_table.biom -m 10 -x 140 -s 10 -n 2 -o rarefied_otu_tables/"""))

script_info[
    'output_description'] = """The result of multiple_rarefactions.py consists of a number of biom files, which depend on the minimum/maximum number of sequences per samples, steps and iterations. The files have the same otu table format as the input otu_table.biom, and are named in the following way: rarefaction_100_0.biom, where "100" corresponds to the sequences per sample and "0" the iteration."""

script_info['required_options'] = [
    make_option('-i', '--input_path',
                help='Input OTU table filepath.',
                type='existing_filepath'),
    make_option('-o', '--output_path',
                help="Output directory.",
                type='new_dirpath'),
    make_option('-m', '--min', type='int',
                help='Minimum number of seqs/sample for rarefaction.'),
    make_option('-x', '--max', type='int',
                help='Maximum number of seqs/sample (inclusive) for rarefaction. '),
    make_option('-s', '--step', type='int',
                help='Size of each steps between the min/max of' +
                ' seqs/sample (e.g. min, min+step... for level <= max).')
]
script_info['optional_options'] = [
    # This option is screwed up: dest should equal the long form parameter name,
    # but I'm not sure if we can do anything about it since '-' is not allowed
Example #7
0
script_info['script_usage'].append(("""""","""Run assignment with usearch using default parameters""","""%prog -i query_nt.fasta -r refseqs_pr.fasta"""))

script_info['script_usage'].append(("""""","""Run nucleotide versus protein BLAT using default parameters""","""%prog -i query_nt.fasta -r refseqs_pr.fasta -m blat"""))

script_info['script_usage'].append(("""""","""Run nucleotide versus protein BLAT using scricter e-value threshold""","""%prog -i query_nt.fasta -r refseqs_pr.fasta -o blat_mapped_strict/ -e 1e-70  -m blat"""))

script_info['script_usage'].append(("""""","""Run nucleotide versus nucleotide BLAT with default parameters""","""%prog -i query_nt.fasta -r refseqs_nt.fasta -m blat-nt"""))

script_info['script_usage'].append(("""""","""Run assignment with bwa-short using default parameters. bwa-short is intended to be used for reads up to 200bp. WARNING: reference sequences must be dereplicated! No matches will be found to reference sequences which show up multiple times (even if their sequence identifiers are different)!""","""%prog -i query_nt.fasta -r refseqs_nt.fasta -m bwa-short"""))

script_info['script_usage'].append(("""""","""Run assignment with bwa-sw using default parameters.  WARNING: reference sequences must be dereplicated! No matches will be found to reference sequences which show up multiple times (even if their sequence identifiers are different)!""","""%prog -i query_nt.fasta -r refseqs_nt.fasta -m bwa-sw"""))

script_info['output_description'] = """ """

script_info['required_options'] = [
    make_option('-i', '--input_seqs_filepath',type='existing_filepath',
        help='Path to input sequences file'),

    make_option('-r', '--refseqs_fp',type='existing_filepath',
        help=('Path to reference sequences to search against [default: %default]')),
    ]

script_info['optional_options'] = [
    make_option('-m', '--assignment_method', type='choice',
        choices=assignment_functions.keys(), default = "usearch",
        help=('Method for picking OTUs.  Valid choices are: ' +\
              ', '.join(assignment_functions.keys()) +\
              '. [default: %default]')),

    make_option('-t', '--observation_metadata_fp',type='existing_filepath',
        help=('Path to observation metadata (e.g., taxonomy, EC, etc.). If an '
              'observation has multiple metadata entries (e.g., more than one '
from qiime.transform_coordinate_matrices import procrustes_monte_carlo,\
    get_procrustes_results

script_info={}
script_info['brief_description']="""Transform two or more coordinate matrices"""
script_info['script_description']="""This script transforms two or more coordinate matrices (e.g., the output of principal_coordinates.py) using procrustes analysis to minimize the distances between corresponding points. The first coordinate matrix provided is treated as the reference, and all other coordinate matrices are transformed to minimize distances to the reference points. Monte Carlo simulations can additionally be performed (-r random trials are run) to estimate the probability of seeing an M^2 value as extreme as the actual M^2."""
script_info['script_usage']=[]
script_info['script_usage'].append(("Write the transformed procrustes matrices to file","","""%prog -i unweighted_unifrac_pc.txt,weighted_unifrac_pc.txt -o procrustes_output"""))

script_info['script_usage'].append(("Generate transformed procrustes matrices and monte carlo p-values for two principal coordinate matrices","","""%prog -i unweighted_unifrac_pc.txt,weighted_unifrac_pc.txt -o mc_procrustes_output_2 -r 1000""",))
script_info['script_usage'].append(("Generate transformed procrustes matrices and monte carlo p-values for four principal coordinate matrices","","""%prog -i unweighted_unifrac_pc.txt,weighted_unifrac_pc.txt,euclidean_pc.txt,bray_curtis_pc.txt -o mc_procrustes_output_4 -r 1000""",))
script_info['script_usage'].append(("Generate transformed procrustes matrices and monte carlo p-values for three principal coordinate matrices where the sample ids must be mapped between matrices","","""%prog -i s1_pc.txt,s2_pc.txt,s3_pc.txt -s s1_s2_map.txt,s1_s3_map.txt -o mc_procrustes_output_3 -r 1000""",))

script_info['output_description']="""Two transformed coordinate matrices corresponding to the two input coordinate matrices, and (if -r was specified) a text file summarizing the results of the Monte Carlo simulations."""
script_info['required_options']=[
 make_option('-i','--input_fps',type='existing_filepaths',
             help='comma-separated list of input coordinate matrices'),
 make_option('-o','--output_dir',type='new_dirpath',
             help='the output directory'),
]
script_info['optional_options']=[
 make_option('-r','--random_trials',type='int',
    help='Number of random permutations of matrix2 to perform. '+
    ' [default: (no Monte Carlo analysis performed)]',default=None),
 make_option('-d','--num_dimensions',type='int',default=3,
    help='Number of dimensions to include in output matrices'+
    ' [default: %default]'),
 make_option('-s','--sample_id_map_fps',
    type='existing_filepaths',
    help='If sample id maps are provided, there must be exactly one fewer files here than there are coordinate matrices (as each nth sample id map will provide the mapping from the first input coordinate matrix to the n+1th coordinate matrix) [default: %default]',
    default=None),
 make_option('--store_trial_details',
Example #9
0
    'brief_description'] = "Starts parallel jobs on Sun GridEngine queueing systems."
script_info[
    'script_description'] = "Starts multiple jobs in parallel on Sun GridEngine systems. This is designed to work with StarCluster EC2 instances, but may be applicable beyond there."
script_info['script_usage'] = [
    ("Job submission example",
     "Start each command listed in test_jobs.txt in parallel. The run ID for these jobs will be RUNID.",
     "%prog -ms test_jobs.txt RUNID"),
    ("Queue specification example",
     "Submit the commands listed in test_jobs.txt to the specified queue.",
     "%prog -ms test_jobs.txt -q all.q RUNID")
]
script_info['output_description'] = "No output is created."
script_info['required_options'] = []
script_info['optional_options'] = [
    make_option('-m',
                '--make_jobs',
                action='store_true',
                help='make the job files [default: %default]'),
    make_option('-s',
                '--submit_jobs',
                action='store_true',
                help='submit the job files [default: %default]'),
    make_option('-q',
                '--queue_name',
                default=qiime_config['sc_queue'],
                help='the queue to submit jobs to [default: %default]')
]
script_info['version'] = __version__
script_info['disallow_positional_arguments'] = False

# qsub template
QSUB_TEXT = """#!/bin/bash
Example #10
0
script_info['script_usage'].append(("Sequence list filtering", "Keep all sequences from as fasta file that are listed in a text file.",
 "%prog -f inseqs.fasta -o list_filtered_seqs.fasta -s seqs_to_keep.txt"))
 
script_info['script_usage'].append(("biom-based filtering", "Keep all sequences that are listed as observations in a biom file.",
 "%prog -f inseqs.fastq -o biom_filtered_seqs.fastq -b otu_table.biom"))

script_info['script_usage'].append(("fastq filtering","Keep all sequences from a fastq file that are listed in a text file (note: file name must end with .fastq to support fastq filtering).",
 "%prog -f inseqs.fastq -o list_filtered_seqs.fastq -s seqs_to_keep.txt"))
 
script_info['script_usage'].append(("sample id list filtering","Keep all sequences from a fasta file where the sample id portion of the sequence identifier is listed in a text file (sequence identifiers in fasta file must be in post-split libraries format: sampleID_seqID).",
 "%prog -f sl_inseqs.fasta -o sample_id_list_filtered_seqs.fasta --sample_id_fp map.txt"))

script_info['output_description']= ""
script_info['required_options'] = [\
 options_lookup['input_fasta'],
 make_option('-o','--output_fasta_fp',type='new_filepath',help='the output fasta filepath')
]
script_info['optional_options'] = [\
 make_option('-m','--otu_map',type='existing_filepath',
  help='an OTU map where sequences ids are those which should be retained'),\
 make_option('-s','--seq_id_fp',type='existing_filepath', 
  help='A list of sequence identifiers (or tab-delimited lines with'
  ' a seq identifier in the first field) which should be retained'),\
 make_option('-b','--biom_fp',type='existing_filepath', 
  help='A biom file where otu identifiers should be retained'),\
 make_option('-a','--subject_fasta_fp',type='existing_filepath',
  help='A fasta file where the seq ids should be retained.'),\
 make_option('-p','--seq_id_prefix',type='string',
  help='keep seqs where seq_id starts with this prefix'),\
 make_option('--sample_id_fp',type='existing_filepath',
  help='keep seqs where seq_id starts with a sample id listed in this file'),\
Example #11
0
In more technical language: OTUs and samples are designated as two types of nodes in a bipartite network in which OTU-nodes are connected via edges to sample-nodes in which their sequences are found. Edge weights are defined as the number of sequences in an OTU. To cluster the OTUs and samples in the network, a stochastic spring-embedded algorithm is used, where nodes act like physical objects that repel each other, and connections act a springs with a spring constant and a resting length: the nodes are organized in a way that minimized forces in the network. These algorithms are implemented in Cytoscape (Shannon et al., 2003)."""

script_info['script_usage'] = []

script_info['script_usage'].append((
    """Example:""",
    """Create network cytoscape and statistic files in a user-specified output directory. This example uses an OTU table (-i) and the metadata mapping file (-m), and the results are written to the "otu_network/" folder.""",
    """%prog -i otu_table.biom -m Fasting_Map.txt -o otu_network"""))

script_info[
    'output_description'] = """The result of make_otu_network.py consists of a folder which contains edge and node files to be loaded into cytoscape along with props files labeled by category, which can used for coloring."""

script_info['required_options'] = [
    make_option('-i',
                '--input_fp',
                type='existing_filepath',
                help='name of otu table file in biom format [REQUIRED]'),
    # note that the options list gets passed around, so it is required that
    # the option be called --map_fname for this value - an annoying name
    # but not refactoring now...
    make_option('-m',
                '--map_fname',
                type='existing_filepath',
                help='name of input map file [REQUIRED]'),
    make_option('-o',
                '--output_dir',
                type='new_dirpath',
                help='output directory for all analyses [REQUIRED]')
]

script_info['optional_options'] = [
Generates a tsv stats file and pdf of boxplots for each input category.
Each row in the tsv file corresponds to a comparison between two groups of treatment values,
and includes the means and standard deviations of the two groups' alpha diversities,
along with the results of the two-sample t-test.
"""

script_info[
    'script_usage_output_to_remove'] = [
    '$PWD/PD_dmax_parametric.txt',
    '$PWD/PD_d100_parametric.txt',
    '$PWD/PD_d100.txt']

script_info['required_options'] = [
    make_option('-i',
                '--alpha_diversity_fp',
                action='store',
                type='existing_filepath',
                help='path to collated alpha diversity file (as generated by '
                'collate_alpha.py) [REQUIRED]'),
    make_option('-m',
                '--mapping_fp',
                action='store',
                type='existing_filepath',
                help='path to the mapping file [REQUIRED]'),
    make_option('-c',
                '--categories',
                action='store',
                type='string',
                help='comma-separated list of categories for comparison [REQUIRED]'),
    make_option('-o',
                '--output_dir',
                action='store',
Example #13
0
    """Create the file outseqs.fasta (-o), which will be a subset of inseqs.fasta (-i) containing only the sequences THAT ARE NOT (-n) associated with sample ids S2, S3, S4 (-s). As always, sample IDs are case-sensitive:""",
    """%prog -i inseqs.fasta -o outseqs_by_sample_negated.fasta -s S2,S3,S4 -n"""
))

script_info['script_usage'].append((
    """""",
    """Create the file outseqs.fasta (-o), which will be a subset of inseqs.fasta (-i) containing only the sequences THAT ARE associated with sample ids whose "Treatment" value is "Fast" in the mapping file:""",
    """%prog -i inseqs.fasta -o outseqs_by_mapping_field.fasta -m map.txt -s "Treatment:Fast" """
))

script_info[
    'output_description'] = """The script produces a fasta file containing containing only the specified SampleIDs."""

script_info['required_options'] = [
    options_lookup['fasta_as_primary_input'],
    make_option('-o', '--output_fasta_fp', help='the output fasta file')
]

script_info['optional_options']=[
    make_option('-n','--negate',action='store_true',default=False,
                help='negate the sample ID list (i.e., output sample '+
                'ids not passed via -s) [default: %default]'),
    make_option('-s','--sample_ids',type='string',\
  help="comma-separated sample_ids to include in output fasta file"+\
  " (or exclude if --negate), or string describing mapping file states"+\
  " defining sample ids (mapping_fp must be provided for the latter)"),
    options_lookup['mapping_fp']]
script_info['version'] = __version__


def main():
Example #14
0
- P: the raw P value returned by the given test.
- FDR_P: the P value corrected by the Benjamini-Hochberg FDR procedure for
  multiple comparisons.
- Bonferroni_P: the P value corrected by the Bonferroni procedure for multiple
  comparisons.
- groupX_mean: there will be as many of these headers as there are unique values
  in the mapping file under the category passed with the -c option. Each of these
  fields will contain the mean frequency/abundance/count of the given OTU for the
  given sample group.
- Taxonomy: this column will be present only if the biom table contained Taxonomy
  information. It will contain the taxonomy of the given OTU.

"""
script_info['required_options'] = [
    make_option('-i',
                '--otu_table_fp',
                help='path to biom format table',
                type='existing_path'),
    make_option('-m',
                '--mapping_fp',
                type='existing_filepath',
                help='path to category mapping file'),
    make_option('-c',
                '--category',
                type='string',
                help='name of the category over which to run the analysis'),
    make_option('-o',
                '--output_fp',
                type='new_filepath',
                help='path to the output file')
]
script_info[
    'script_description'] = """This script performs like the assign_taxonomy.py script, but is intended to make use of multicore/multiprocessor environments to perform analyses in parallel."""

script_info['script_usage'] = []
script_info['script_usage'].append((
    """Example""",
    """Assign taxonomy to all sequences in the input file (-i) using BLAST with the id to taxonomy mapping file (-t) and reference sequences file (-r), and write the results (-o) to $PWD/blast_assigned_taxonomy/. ALWAYS SPECIFY ABSOLUTE FILE PATHS (absolute path represented here as $PWD, but will generally look something like /home/ubuntu/my_analysis/).""",
    """%prog -i $PWD/inseqs.fasta -t $PWD/id_to_tax.txt -r $PWD/refseqs.fasta -o $PWD/blast_assigned_taxonomy/"""
))

script_info[
    'output_description'] = """Mapping of sequence identifiers to taxonomy and quality scores."""

script_info['required_options'] = [
    make_option('-i',
                '--input_fasta_fp',
                type='existing_filepath',
                help='full path to ' + 'input_fasta_fp [REQUIRED]'),
    make_option('-o',
                '--output_dir',
                action='store',
                type='new_dirpath',
                help='full path to store output files ' + '[REQUIRED]')
]

script_info['optional_options'] = [
    make_option(
        '-r',
        '--reference_seqs_fp',
        type='existing_filepath',
        help='Ref seqs to blast against.  Must provide either --blast_db or '
        '--reference_seqs_db for assignment with blast [default: %s]' %
Example #16
0
]

script_info['output_description'] = """
prefix_dereplicated.sff.txt: human readable sff file containing the flowgram of the
                             cluster representative of each cluster.

prefix_dereplicated.fasta: Fasta file containing the cluster representative of each cluster.

prefix_mapping.txt: This file contains the actual clusters. The cluster centroid is given first,
                    the cluster members follw after the ':'.
"""

script_info['required_options'] = [

    make_option('-i', '--input_files', action='store',
                type='existing_filepaths', dest='sff_fps',
                help='path to flowgram files (.sff.txt), ' +
                'comma separated')
]

script_info['optional_options'] = [
    make_option('-f', '--fasta_file', action='store', type='string',
                dest='fasta_fp', help='path to fasta input file ' +
                '[default: %default]', default=None),

    make_option('-s', '--squeeze', action='store_true', dest='squeeze',
                help='Use run-length encoding for prefix ' +
                'filtering [default: %default]', default=False),

    make_option('-l', '--log_file', action='store',
                type='string', dest='log_fp', help='path to log file ' +
                '[default: %default]', default="preprocess.log"),
script_info = {}
script_info[
    'brief_description'] = "Split a single post-split_libraries.py fasta file into per-sample fasta files."
script_info[
    'script_description'] = "Split a single post-split_libraries.py fasta file into per-sample fasta files. This script requires that the sequences identitifers are in post-split_libraries.py format (i.e., SampleID_SeqID). A fasta file will be created for each unique SampleID."
script_info['script_usage'] = [(
    "",
    "Split seqs.fna into one fasta file per sample and store the resulting fasta files in 'out'",
    "%prog -i seqs.fna -o out/")]
script_info['script_usage_output_to_remove'] = ['$PWD/out/']
script_info[
    'output_description'] = "This script will produce an output directory with as many files as samples."
script_info['required_options'] = [
    make_option(
        '-i',
        '--input_fasta_fp',
        type="existing_filepath",
        help='the input fasta file to split'),
    make_option(
        '-o',
        '--output_dir',
        type="new_dirpath",
        help='the output directory [default: %default]'),
]
script_info['optional_options'] = [
    make_option('--buffer_size', type="int", default=500,
                help="the number of sequences to read into memory before writing to file (you usually won't need to change this) [default: %default]"),
]
script_info['version'] = __version__

Example #18
0
script_info['script_usage_output_to_remove'] = ['bdiv_jk100']

script_info[
    'output_description'] = """This scripts results in several distance\
 matrices (from beta_diversity.py), several rarified otu tables\
 (from multiple_rarefactions.py) several UPGMA trees (from upgma_cluster.py),\
 a supporting file and newick tree with support values (from tree_compare.py),\
 and 2D and 3D PCoA plots."""

qiime_config = load_qiime_config()
options_lookup = get_options_lookup()

script_info['required_options'] = [
    make_option('-i',
                '--otu_table_fp',
                type='existing_filepath',
                help='the input OTU table in biom format [REQUIRED]'),
    make_option('-o',
                '--output_dir',
                type='new_dirpath',
                help='the output directory [REQUIRED]'),
    make_option(
        '-e',
        '--seqs_per_sample',
        type='int',
        help='number of sequences to include in each jackknifed subset' +
        ' [REQUIRED]'),
    make_option('-m',
                '--mapping_fp',
                type='existing_filepath',
                help='path to the mapping file [REQUIRED]'),
Example #19
0
script_info['script_description']="""The summarize_taxa.py script provides summary information of the representation of taxonomic groups within each sample. It takes an OTU table that contains taxonomic information as input. The taxonomic level for which the summary information is provided is designated with the -L option. The meaning of this level will depend on the format of the taxon strings that are returned from the taxonomy assignment step. The taxonomy strings that are most useful are those that standardize the taxonomic level with the depth in the taxonomic strings. For instance, for the RDP classifier taxonomy, Level 2 = Domain (e.g. Bacteria), 3 = Phylum (e.g. Firmicutes), 4 = Class (e.g. Clostridia), 5 = Order (e.g. Clostridiales), 6 = Family (e.g. Clostridiaceae), and 7 = Genus (e.g. Clostridium). By default, the relative abundance of each taxonomic group will be reported, but the raw counts can be returned if -a is passed.

By default, taxa summary tables will be output in both classic (tab-separated) and BIOM formats. The BIOM-formatted taxa summary tables can be used as input to other QIIME scripts that accept BIOM files.
"""
script_info['script_usage']=[]

script_info['script_usage'].append(("""Examples:""","""Summarize taxa based at taxonomic levels 2, 3, 4, 5, and 6, and write resulting taxa tables to the directory "./tax" ""","""%prog -i otu_table.biom -o ./tax"""))

script_info['script_usage'].append(("""Examples:""","""Summarize taxa based at taxonomic levels 2, 3, 4, 5, and 6, and write resulting mapping files to the directory "./tax" ""","""%prog -i otu_table.biom -o tax_mapping/ -m Fasting_Map.txt"""))

script_info['output_description']="""There are two possible output formats depending on whether or not a mapping file is provided with the -m option. If a mapping file is not provided, a table is returned where the taxonomic groups are each in a row and there is a column for each sample. If a mapping file is provided, the summary information will be appended to this file. Specifically, a new column will be made for each taxonomic group to which the relative abundances or raw counts will be added to the existing rows for each sample. The addition of the taxonomic information to the mapping file allows for taxonomic coloration of Principal coordinates plots in the 3d viewer. As described in the make_emperor.py section, principal coordinates plots can be dynamically colored based on any of the metadata columns in the mapping file. Dynamic coloration of the plots by the relative abundances of each taxonomic group can help to distinguish which taxonomic groups are driving the clustering patterns.
"""

script_info['required_options']= [\
    make_option('-i','--otu_table_fp', dest='otu_table_fp',
        help='Input OTU table filepath [REQUIRED]',
        type='existing_filepath'),
]
script_info['optional_options'] = [\
    make_option('-L','--level',default='2,3,4,5,6' , type='string',
        help='Taxonomic level to summarize by. [default: %default]'),
    make_option('-m','--mapping', 
        help='Input metadata mapping filepath. If supplied, then the taxon' +\
        ' information will be added to this file. This option is ' +\
        ' useful for coloring PCoA plots by taxon abundance or to ' +\
        ' perform statistical tests of taxon/mapping associations.',
        type='existing_filepath'),
    make_option('--md_identifier',default='taxonomy', type='string',
             help='the relevant observation metadata key [default: %default]'),
    make_option('--md_as_string',default=False,action='store_true',
             help='metadata is included as string [default: metadata is included as list]'),
    'brief_description'] = """Get the reverse complement of all sequences"""
script_info[
    'script_description'] = """Write the reverse complement of all seqs in seqs.fasta (-i) to seqs_rc.fasta (default, change output_fp with -o). Each sequence description line will have ' RC' appended to the end of it (default,
leave sequence description lines untouched by passing -r):"""
script_info['script_usage'] = []
script_info['script_usage'].append((
    """Example:""",
    """Reverse complement all sequences in seqs.fna and write result to seqs_rc.fna""",
    """%prog -i seqs.fna"""))
script_info['output_description'] = """"""
script_info['required_options'] = [options_lookup['fasta_as_primary_input']]
script_info['optional_options'] = [
    options_lookup['output_fp'],
    make_option('-r',
                '--retain_seq_id',
                action='store_true',
                help='leave seq description lines untouched' +
                ' [default: append " RC" to seq description lines]')
]
script_info['version'] = __version__


def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose

    input_fasta_fp = opts.input_fasta_fp
    output_fp = opts.output_fp
    retain_seq_id = opts.retain_seq_id
""", """%prog -i otu_table.biom -o bdiv_jk100 -e 100 -m Fasting_Map.txt\
 -t rep_set.tre"""))

script_info['script_usage_output_to_remove'] = ['bdiv_jk100']

script_info['output_description'] = """This scripts results in several distance\
 matrices (from beta_diversity.py), several rarified OTU tables\
 (from multiple_rarefactions_even_depth.py), several UPGMA trees (from upgma_cluster.py),\
 a supporting file and newick tree with support values (from tree_compare.py),\
 and Emperor PCoA plots."""

qiime_config = load_qiime_config()
options_lookup = get_options_lookup()

script_info['required_options'] = [
    make_option('-i', '--otu_table_fp', type='existing_filepath',
                help='the input OTU table in biom format [REQUIRED]'),
    make_option('-o', '--output_dir', type='new_dirpath',
                help='the output directory [REQUIRED]'),
    make_option('-e', '--seqs_per_sample', type='int',
                help='number of sequences to include in each jackknifed subset' +
                ' [REQUIRED]'),
    make_option('-m', '--mapping_fp', type='existing_filepath',
                help='path to the mapping file [REQUIRED]'),
]

script_info['optional_options'] = [
    make_option('-t', '--tree_fp', type='existing_filepath',
                help='path to the tree file [default: %default; ' +
                'REQUIRED for phylogenetic measures]'),
    make_option('-p', '--parameter_fp', type='existing_filepath',
                help='path to the parameter file, which specifies changes' +
 like /home/ubuntu/my_analysis/).""",
     """%prog -i $PWD/rarefied_otu_tables -o $PWD/adiv\
 -m observed_otus,chao1,PD_whole_tree -t $PWD/rep_set.tre"""))

script_info['output_description'] = """The resulting output will be the same\
 number of files as supplied by the user. The resulting files are tab-delimited\
 text files, where the columns correspond to alpha diversity metrics and the\
 rows correspond to samples and their calculated diversity measurements. """

script_info['version'] = __version__

options_lookup = get_options_lookup()

script_info['required_options'] = [
    make_option('-i',
                '--input_path',
                type='existing_dirpath',
                help='input path, must be directory [REQUIRED]'),
    make_option('-o',
                '--output_path',
                type='new_dirpath',
                help='output path, must be directory [REQUIRED]'),
]

script_info['optional_options'] = [
    make_option(
        '-t',
        '--tree_path',
        type='existing_filepath',
        help='path to newick tree file, required for phylogenetic metrics' +
        ' [default: %default]'),
    make_option('-m',
     "Execute the analysis of volatility using the first difference method, "
     "grouping the samples using the 'Treatment' category, sorting them using "
     "the 'time' category and calculating the trajectory using the first "
     "four axes",
     "%prog -i pcoa_res.txt -m map.txt -c 'Treatment' --algorithm diff "
     "-o diff_output -s time --axes 4"),
    ("Window difference method",
     "Execute the analysis of volatility using the window difference method, "
     "grouping the samples using the 'Treatment' category, sorting them using "
     "the 'time' category, weighting the output by the space between "
     "samples in the 'time' category and using a window size of three.",
     "%prog -i pcoa_res.txt -m map.txt -c 'Treatment' --algorithm wdiff "
     "-o wdiff_output -s time --window_size 3 -w")
]
script_info['required_options'] = [
    make_option('-i', '--input_fp', type='existing_filepath',
                help="Input ordination results filepath"),
    make_option('-m', '--map_fp', type='existing_filepath',
                help="Input metadata mapping filepath"),
    make_option('-c', '--categories', type='str',
                help="Comma-separated list of category names of the mapping "
                     "file to use to create the trajectories"),
    make_option('-o', '--output_dir', type='new_dirpath',
                help="Name of the output directory to save the results")
]
script_info['optional_options'] = [
    make_option('-s', '--sort_by', type='str', default=None,
                help="Category name of the mapping file to use to sort"),
    make_option('--algorithm', type='choice', default='avg',
                choices=TRAJECTORY_ALGORITHMS,
                help="The algorithm to use. Available methods: "
                     + str(TRAJECTORY_ALGORITHMS) + ". [Default: %default]"),
__email__ = "*****@*****.**"
 

from cogent.parse.fasta import MinimalFastaParser
from qiime.util import (parse_command_line_parameters, 
                        make_option, 
                        split_fasta_on_sample_ids_to_files)

script_info = {}
script_info['brief_description'] = "Split a single post-split_libraries.py fasta file into per-sample fasta files."
script_info['script_description'] = "Split a single post-split_libraries.py fasta file into per-sample fasta files. This script requires that the sequences identitifers are in post-split_libraries.py format (i.e., SampleID_SeqID). A fasta file will be created for each unique SampleID."
script_info['script_usage'] = [("","Split seqs.fna into one fasta file per sample and store the resulting fasta files in 'out'","%prog -i seqs.fna -o out/")]
script_info['script_usage_output_to_remove'] = ['$PWD/out/']
script_info['output_description']= "This script will produce an output directory with as many files as samples."
script_info['required_options'] = [
 make_option('-i','--input_fasta_fp',type="existing_filepath",help='the input fasta file to split'),
 make_option('-o','--output_dir',type="new_dirpath",help='the output directory [default: %default]'),\
]
script_info['optional_options'] = [\
 make_option('--buffer_size',type="int",default=500,
 help="the number of sequences to read into memory before writing to file (you usually won't need to change this) [default: %default]"),\
]
script_info['version'] = __version__



def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    split_fasta_on_sample_ids_to_files(MinimalFastaParser(open(opts.input_fasta_fp,'U')),
Example #25
0
                            "%prog -f inseqs.fastq -o biom_filtered_seqs.fastq -b otu_table.biom"))

script_info[
    'script_usage'].append(("fastq filtering", "Keep all sequences from a fastq file that are listed in a text file (note: file name must end with .fastq to support fastq filtering).",
                            "%prog -f inseqs.fastq -o list_filtered_seqs.fastq -s seqs_to_keep.txt"))

script_info[
    'script_usage'].append(("sample id list filtering", "Keep all sequences from a fasta file where the sample id portion of the sequence identifier is listed in a text file (sequence identifiers in fasta file must be in post-split libraries format: sampleID_seqID).",
                            "%prog -f sl_inseqs.fasta -o sample_id_list_filtered_seqs.fasta --sample_id_fp map.txt"))

script_info['output_description'] = ""
script_info['required_options'] = [
    options_lookup['input_fasta'],
    make_option(
        '-o',
        '--output_fasta_fp',
        type='new_filepath',
        help='the output fasta filepath')
]
script_info['optional_options'] = [
    make_option('-m', '--otu_map', type='existing_filepath',
                help='an OTU map where sequences ids are those which should be retained'),
    make_option('-s', '--seq_id_fp', type='existing_filepath',
                help='A list of sequence identifiers (or tab-delimited lines with'
                ' a seq identifier in the first field) which should be retained'),
    make_option('-b', '--biom_fp', type='existing_filepath',
                help='A biom file where otu identifiers should be retained'),
    make_option('-a', '--subject_fasta_fp', type='existing_filepath',
                help='A fasta file where the seq ids should be retained.'),
    make_option('-p', '--seq_id_prefix', type='string',
                help='keep seqs where seq_id starts with this prefix'),
Example #26
0
script_info['script_usage'].append(
    ("Example 1", """Print qiime config settings:""",
     """print_qiime_config.py"""))
script_info['script_usage'].append(
    ("Example 2", """Print and check qiime config settings for sanity:""",
     """print_qiime_config.py -t"""))

script_info[
    'output_description'] = """This prints the qiime_config to stdout."""
script_info['version'] = __version__
script_info['help_on_no_arguments'] = False
script_info['required_options'] = []
script_info['optional_options'] = [
    make_option(
        '-t',
        '--test',
        action='store_true',
        default=False,
        help='Test the QIIME install and configuration [default: %default]'),
    make_option(
        '-b',
        '--qiime_base_install',
        action='store_true',
        default=False,
        help=
        'If passed, report only on dependencies required for the QIIME base install [default: %default]'
    ),
    make_option('--haiku',
                action='store_true',
                default=False,
                help=SUPPRESS_HELP)
]
from qiime.util import get_tmp_filename
from cogent.app.formatdb import build_blast_db_from_fasta_path

qiime_config = load_qiime_config()
options_lookup = get_options_lookup()

script_info={}
script_info['brief_description']="""Parallel pick otus using BLAST"""
script_info['script_description']="""This script performs like the pick_otus.py script, but is intended to make use of multicore/multiprocessor environments to perform analyses in parallel."""
script_info['script_usage']=[]
script_info['script_usage'].append(("""Example""","""Pick OTUs by blasting /home/qiime_user/inseqs.fasta against /home/qiime_user/refseqs.fasta and write the output to the /home/qiime_user/out/ directory.""","""%prog -i /home/qiime_user/inseqs.fasta -r /home/qiime_user/refseqs.fasta -o /home/qiime_user/out/"""))
script_info['output_description']="""The output consists of two files (i.e. seqs_otus.txt and seqs_otus.log). The .txt file is composed of tab-delimited lines, where the first field on each line corresponds to an (arbitrary) cluster identifier, and the remaining fields correspond to sequence identifiers assigned to that cluster. Sequence identifiers correspond to those provided in the input FASTA file. The resulting .log file contains a list of parameters passed to this script along with the output location of the resulting .txt file."""

script_info['required_options'] = [\
    make_option('-i','--input_fasta_fp',action='store',\
           type='string',help='full path to '+\
           'input_fasta_fp'),\
    make_option('-o','--output_dir',action='store',\
           type='string',help='path to store output files')\
]

script_info['optional_options'] = [\
    make_option('-e','--max_e_value',\
          help='Max E-value '+\
          '[default: %default]', default='1e-10'),\
         
    make_option('-s','--similarity',action='store',\
          type='float',help='Sequence similarity '+\
          'threshold [default: %default]',default=0.97),\
          
    make_option('-r','--refseqs_fp',action='store',\
Example #28
0
 three dimensions in your file. Other combinations can be viewed using the\
 "Views:Choose viewing axes" option in the KiNG viewer (Chen, Davis,\
 & Richardson, 2009), which may require the installation of kinemage software.\
 The first 10 components can be viewed using "Views:Paralled coordinates"\
 option or typing "/". The mouse can be used to modify display parameters, to\
 click and rotate the viewing axes, to select specific points (clicking on a\
 point shows the sample identity in the low left corner), or to select\
 different analyses (upper right window). Although samples are most easily\
 viewed in 2D, the third dimension is indicated by coloring each sample\
 (dot/label) along a gradient corresponding to the depth along the third\
 component (bright colors indicate points close to the viewer)."""

script_info['required_options']=[\
    make_option('-i', '--coord_fname',
        help='Input principal coordinates filepath (i.e.,' +\
        ' resulting file from principal_coordinates.py).  Alternatively,' +\
        ' a directory containing multiple principal coordinates files for' +\
        ' jackknifed PCoA results.',
        type='existing_path'),
    make_option('-m', '--map_fname', dest='map_fname',
        help='Input metadata mapping filepath',
        type='existing_filepath')
    ]
script_info['optional_options']=[\
    make_option('-b', '--colorby', dest='colorby', type='string',\
        help='Comma-separated list categories metadata categories' +\
        ' (column headers) ' +\
        'to color by in the plots. The categories must match the name of a ' +\
        'column header in the mapping file exactly. Multiple categories ' +\
        'can be list by comma separating them without spaces. The user can ' +\
        'also combine columns in the mapping file by separating the ' +\
        'categories by "&&" without spaces. [default=color by all]'),
from qiime.format import format_biom_table
from qiime.filter import get_otu_ids_from_taxonomy_f
from qiime.util import parse_command_line_parameters, make_option

script_info = {}
script_info['brief_description'] = "Filter taxa from an OTU table"
script_info['script_description'] = "This scripts filters an OTU table based on taxonomic metadata. It can be applied for positive filtering (i.e., keeping only certain taxa), negative filtering (i.e., discarding only certain taxa), or both at the same time."
script_info['script_usage'] = []
script_info['script_usage'].append(("","Filter otu_table.biom to include only OTUs identified as __Bacteroidetes or p__Firmicutes.","%prog -i otu_table.biom -o otu_table_bac_firm_only.biom -p p__Bacteroidetes,p__Firmicutes"))
script_info['script_usage'].append(("","Filter otu_table.biom to exclude OTUs identified as p__Bacteroidetes or p__Firmicutes.","%prog -i otu_table.biom -o otu_table_non_bac_firm.biom -n p__Bacteroidetes,p__Firmicutes"))
script_info['script_usage'].append(("","Filter otu_table.biom to include OTUs identified as p__Firmicutes but not c__Clostridia.","%prog -i otu_table.biom -o otu_table_all_firm_but_not_clos.biom -p p__Firmicutes -n c__Clostridia"))

script_info['output_description']= ""
script_info['required_options'] = [
 make_option('-i','--input_otu_table_fp',
             type="existing_filepath",
             help='the input otu table filepath'),
 make_option('-o','--output_otu_table_fp',
             type="new_filepath",
             help='the output otu table filepath'),
]
script_info['optional_options'] = [
 make_option('-p','--positive_taxa',
             help='comma-separated list of taxa to retain [default: None; retain all taxa]'),
 make_option('-n','--negative_taxa',
             help='comma-separated list of taxa to discard [default: None; retain all taxa]'),
 make_option('--metadata_field',default='taxonomy',
             help='observation metadata identifier to filter based on [default: %default]'),
]
script_info['version'] = __version__
Example #30
0
script_info['script_usage'].append((
    """NMDS (Dimensions)""",
    """For this script, the user supplies a distance matrix (i.e. resulting file from beta_diversity.py), the number of dimensions of NMDS space and the output filename (e.g. beta_div_coords.txt), as follows:""",
    """%prog -i beta_div.txt -d 3 -o beta_div_3_coords.txt"""))
script_info['script_usage'].append((
    """NMDS (Multiple Files):""",
    """The script also functions in batch mode if a folder is supplied as input (e.g. from beta_diversity.py run in batch). No other files should be present in the input folder - only the distance matrix files to be analyzed. This script operates on every distance matrix file in the input directory and creates a corresponding nmds results file in the output directory, e.g.:""",
    """%prog -i beta_div_weighted_unifrac/ -o beta_div_weighted_nmds_results/"""
))
script_info[
    'output_description'] = """The resulting output file consists of the NMDS axes (columns) for each sample (rows). Pairs of NMDS axes can then be graphed to view the relationships between samples. The bottom of the output file contains the stress of the ordination."""
script_info['required_options']=[\

make_option('-i', '--input_path',type='existing_path',\
     help='path to the input distance matrix file(s) (i.e., the output from beta_diversity.py). Is a directory for batch processing and a filename for a single file operation.'),\

make_option('-o', '--output_path',type='new_path',
     help='output path. directory for batch processing, '+\
       'filename for single file operation'),\
]

script_info['optional_options']=[
    make_option('-d', '--dimensions', default=3,type='int',
     help='number of dimensions of NMDS space'+\
       'default: %default'),
]
script_info['version'] = __version__


def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
Example #31
0
script_info["script_usage"].append(
    (
        """Write to standard out and edit mapping file:""",
        """Calculate statistics on an OTU table and add sequence/sample count data to mapping file.""",
        """%prog -i otu_table.biom -m Fasting_Map.txt -o map.txt""",
    )
)

script_info[
    "output_description"
] = """The resulting statistics are written to stdout. If -m is passed, a new mapping file is written to the path specified by -o, in addition to the statistics written to stdout"""
script_info["required_options"] = [options_lookup["otu_table_as_primary_input"]]
script_info["optional_options"] = [
    make_option(
        "-m",
        "--mapping_fp",
        type="existing_filepath",
        help='a mapping file. If included, this script will modify the mapping file to include sequences per sample (library) information, and write the modified mapping file to the path specified by -o. The sequences (individuals) per sample is presented in a new column entitled "NumIndividuals", and samples present in the mapping file but not the otu table have the value "na" in this column. Note also that the location of comments is not preserved in the new mapping file',
    ),
    make_option(
        "-o",
        "--output_mapping_fp",
        help="the output filepath where the modified mapping file will be written",
        type="new_filepath",
    ),
    make_option(
        "--num_otus",
        action="store_true",
        help="Counts are presented as number of observed OTUs per sample, rather than counts of sequences per sample [default: %default]",
        default=False,
    ),
]
    """OTU tables of different depths""",
    """Build rarefied otu tables containing 10 (-m) to 140 (-x) sequences in steps of 10 (-s) with 2 (-n) repetions per number of sequences, from otu_table.biom (-i). Write the output files to the rarefied_otu_tables directory (-o, will be created if it doesn't exist). The name of the output files will be of the form rarefaction_<num_seqs>_<reptition_number>.biom. ALWAYS SPECIFY ABSOLUTE FILE PATHS (absolute path represented here as $PWD, but will generally look something like /home/ubuntu/my_analysis/).""",
    """%prog -o $PWD/rarefied_otu_tables/ -m 10 -x 140 -s 10 -n 2 -i $PWD/otu_table.biom"""
))

script_info['script_usage'].append((
    """OTU tables of the same depth""",
    """Build 8 rarefied otu tables each containing exactly 100 sequences per sample (even depth rarefaction). ALWAYS SPECIFY ABSOLUTE FILE PATHS (absolute path represented here as $PWD, but will generally look something like /home/ubuntu/my_analysis/).""",
    """%prog -o $PWD/even_otu_tables/ -m 100 -x 100 -n 8 -i $PWD/otu_table.biom"""
))

script_info[
    'output_description'] = """The result of parallel_multiple_rarefactions.py consists of a number of files, which depend on the minimum/maximum number of sequences per samples, steps and iterations. The files have the same otu table format as the input otu_table.biom, and are named in the following way: rarefaction_100_0.txt, where "100" corresponds to the sequences per sample and "0" for the iteration."""

script_info['required_options'] = [\
 make_option('-i', '--input_path',type='existing_filepath',
        help='input filepath, (the otu table) [REQUIRED]'),\
 make_option('-o', '--output_path',type='new_dirpath',
        help="write output rarefied otu tables here makes dir if it doesn't exist [REQUIRED]"),\
 make_option('-m', '--min', type=int,help='min seqs/sample [REQUIRED]'),\
 make_option('-x', '--max', type=int,\
                      help='max seqs/sample (inclusive) [REQUIRED]'),\

]
script_info['optional_options'] = [
    make_option(
        '-n',
        '--num-reps',
        dest='num_reps',
        default=10,
        type=int,
        help='num iterations at each seqs/sample level [default: %default]'),
     """If you want to insert sequences using pplacer, you can supply a fasta file containg query sequences (aligned to reference sequences) along with the reference alignment, a starting tree and the stats file produced when building the starting tree via pplacer as follows:""",
     """%prog -i aligned_query_seqs.fasta -r aligned_reference_seqs.fasta -t starting_tree.tre -o insertion_results -m parsinsert"""))
script_info['script_usage'].append(
    ("""Pplacer Example:""",
     """If you want to insert sequences using pplacer, you can supply a fasta file containg query sequences (aligned to reference sequences) along with the reference alignment, a starting tree and the stats file produced when building the starting tree via pplacer as follows:""",
     """%prog -i aligned_query_seqs.fasta -r aligned_reference_seqs.fasta -t starting_tree.tre -o insertion_results -m pplacer"""))
script_info['script_usage'].append(
    ("""Parameters file:""",
     """Additionally, users can supply a parameters file to change the options of the underlying tools as follows:""",
     """%prog -i aligned_query_seqs.fasta -r aligned_reference_seqs.fasta -t starting_tree.tre -o insertion_results -p raxml_parameters.txt"""))
script_info[
    'output_description'] = "The result of this script produces a tree file (in Newick format) along with a log file containing the output from the underlying tool used for tree insertion."
script_info['required_options'] = [
    options_lookup['fasta_as_primary_input'],
    options_lookup['output_dir'],
    make_option('-t', '--starting_tree_fp',
                type='existing_filepath', help='Starting Tree which you would like to insert into.'),
    make_option('-r', '--refseq_fp',
                type='existing_filepath', dest='refseq_fp', help='Filepath for ' +
                'reference alignment'),
]
script_info['optional_options'] = [
    make_option('-m', '--insertion_method',
                type='choice', help='Method for aligning' +
                ' sequences. Valid choices are: ' +
                ', '.join(insertion_method_choices) + ' [default: %default]',
                choices=insertion_method_choices,
                default='raxml_v730'),
    make_option('-s', '--stats_fp',
                type='existing_filepath', help='Stats file produced by tree-building software. REQUIRED if -m pplacer [default: %default]'),
    make_option('-p', '--method_params_fp',
                type='existing_filepath', help="Parameters file containing method-specific parameters to use. Lines should be formatted as 'raxml:-m GTRCAT' (note this is not a standard QIIME parameters file, but a RAxML parameters file). [default: %default]"),
Example #34
0
centroids.fasta: The cluster representatives of each cluster

singletons.fasta: contains all unclustered reads

denoiser_mapping.txt: This file contains the actual clusters. The cluster centroid is given first,
                    the cluster members follow after the ':'.   

checkpoints/ : directory with checkpoints

Note that the centroids and singleton files are disjoint. For most downstream analyses one wants to cat the two files.
"""

script_info['required_options']=[\

    make_option('-i','--input_file',action='store',\
                    type='string',dest='sff_fp',help='path to flowgram file. '+\
                    'Separate several files by commas '+\
                    '[REQUIRED]', default=None)
    ]

script_info['optional_options']=[ \

    make_option('-f','--fasta_fp',action='store',\
                    type='string',dest='fasta_fp',\
                    help='path to fasta input file. '+\
                    'Reads not in the fasta file are filtered out '+\
                    'before denoising. File format is as produced by '+\
                    'split_libraries.py '+\
                    '[default: %default]',\
                    default=None),

    make_option('-o','--output_dir',action='store',\
script_info['script_usage'] = [
    ("Job submission example",
     "Start each command listed in test_jobs.txt in parallel. The run ID for these jobs will be RUNID.",
     "%prog -ms test_jobs.txt RUNID"),
    ("Queue specification example",
     "Submit the commands listed in test_jobs.txt to the specified queue.",
     "%prog -ms test_jobs.txt -q friendlyq RUNID"),
    ("Jobs output directory specification example",
     "Submit the commands listed in test_jobs.txt, with the jobs put under the "
     "my_jobs/ directory.",
     "%prog -ms test_jobs.txt -j my_jobs/ RUNID")
]
script_info['output_description'] = "No output is created."
script_info['required_options'] = []
script_info['optional_options'] = [
    make_option('-m', '--make_jobs', action='store_true',
                help='make the job files [default: %default]'),

    make_option('-s', '--submit_jobs', action='store_true',
                help='submit the job files [default: %default]'),

    make_option('-q', '--queue',
                help='name of queue to submit to [default: %default]',
                default=qiime_config['torque_queue']),

    make_option('-j', '--job_dir',
                help='directory to store the jobs [default: %default]',
                default="jobs/"),

    make_option('-w', '--max_walltime', type='int',
                help='maximum time in hours the job will run for [default: %default]',
                default=72),
Example #36
0
     "-o single_plot.pdf"),
    ("multiple graph example", "Plot the rank-abundance curve of several "
     "samples:", "%prog -i otu_table.biom  -s "
     "'PC.354,PC.481,PC.636' -x -v -o "
     "multi_plot.pdf"),
    ("multiple graph example", "Plot the rank-abundance curve of all samples "
     "in an OTU table:", "%prog -i otu_table.biom  -s '*' -x -f eps "
     "-v -o all_plot.eps"),
]

script_info['output_description'] = ""

script_info['required_options'] = [
    options_lookup['otu_table_as_primary_input'],
    make_option('-s',
                '--sample_name',
                type='string',
                help='name of the sample to plot. Use "*" to plot all.'),
    make_option(
        '-o',
        '--result_fp',
        type='new_filepath',
        help='Path to store resulting figure file. File extension will be '
        'appended if not supplied (e.g.: rankfig -> rankfig.pdf). '
        'Additionally, a log file rankfig_log.txt will be created')
]

# could basically allow all of matplotlib format here
file_types = ['pdf', 'svg', 'png', 'eps']

script_info['optional_options'] = [
    make_option('-a',
Additionally, a table summary is generated by running the 'biom summarize-table' command (part of the biom-format package). To update parameters to this command, your parameters file should use 'biom-summarize-table' (without quotes) as the script name. See http://qiime.org/documentation/qiime_parameters_files.html for more details.
"""

script_info['script_usage'] = []

script_info['script_usage'].append(
    ("",
     "Run diversity analyses at 20 sequences/sample, with categorical analyses focusing on the SampleType and day categories. ALWAYS SPECIFY ABSOLUTE FILE PATHS (absolute path represented here as $PWD, but will generally look something like /home/ubuntu/my_analysis/).",
     "%prog -i $PWD/otu_table.biom -o $PWD/core_output -m $PWD/map.txt -c SampleType,day -t $PWD/rep_set.tre -e 20"))

script_info['script_usage_output_to_remove'] = ['$PWD/core_output']

script_info['output_description'] = """"""

script_info['required_options'] = [
    make_option('-i', '--input_biom_fp', type='existing_filepath',
                help='the input biom file [REQUIRED]'),
    make_option('-o', '--output_dir', type='new_dirpath',
                help='the output directory [REQUIRED]'),
    make_option('-m', '--mapping_fp', type='existing_filepath',
                help='the mapping filepath [REQUIRED]'),
    make_option('-e', '--sampling_depth', type='int', default=None,
                help=('Sequencing depth to use for even sub-sampling and maximum'
                      ' rarefaction depth. You should review the output of the'
                      ' \'biom summarize-table\' command to decide on this value.')),
]

script_info['optional_options'] = [
    make_option('-p', '--parameter_fp', type='existing_filepath',
                help=('path to the parameter file, which specifies changes'
                      ' to the default behavior. For more information, see'
                      ' www.qiime.org/documentation/qiime_parameters_files.html'
options_lookup = get_options_lookup()

script_info = {}
script_info[
    'brief_description'] = """Checks a fasta file to verify if it has  been properly demultiplexed, i.e., it is in QIIME compatible format."""
script_info[
    'script_description'] = """Checks file is a valid fasta file, does not contain gaps ('.' or '-' characters), contains only valid nucleotide characters, no fasta label is duplicated, SampleIDs match those in a provided mapping file, fasta labels are formatted to have SampleID_X as normally generated by QIIME demultiplexing, and the BarcodeSequence/LinkerPrimerSequences are not found in the fasta sequences.  Optionally this script can also verify that the SampleIDs in the fasta sequences are also present in the tip IDs of a provided newick tree file, can test for equal sequence lengths across all sequences, and can test that all SampleIDs in the mapping file are represented in the fasta file labels."""
script_info['script_usage'] = []
script_info['script_usage'].append(
    ("""Example:""", """ """,
     """ validate_demultiplexed_fasta.py -f seqs.fasta -m Mapping_File.txt"""))
script_info['output_description'] = """"""
script_info['required_options']=[\
   make_option('-m', '--mapping_fp', type='existing_filepath',
        help='name of mapping file. NOTE: Must contain a header'+\
         ' line indicating SampleID in the first column and'+\
         ' BarcodeSequence in the second,'+\
         ' LinkerPrimerSequence in the third.  If no barcode or '+\
         ' linkerprimer sequence is present, leave data fields empty.'),

   options_lookup['fasta_as_primary_input']\
]
script_info['optional_options']=[\
    make_option('-o', '--output_dir', default='.',type='new_dirpath',
        help='directory prefix for output files [default: %default]'),
    make_option('-t','--tree_fp', default=None,type='existing_filepath',
        help='path to the tree file; '+\
         'Needed to test if sequence IDs are a subset or exact match to the '+\
         'tree tips, options -s and -e  [default: %default]'),
    make_option('-s', '--tree_subset', default=False, action='store_true',
        help='Determine if sequence IDs are a subset of the tree tips, '+\
         'newick tree must be passed with the -t option. [default: %default]'),
default_reference_seqs_fp = qiime_config['assign_taxonomy_reference_seqs_fp']
default_id_to_taxonomy_fp = qiime_config['assign_taxonomy_id_to_taxonomy_fp']

script_info={}

script_info['brief_description']="""Parallel taxonomy assignment using BLAST"""
script_info['script_description']="""This script performs like the assign_taxonomy.py script, but is intended to make use of multicore/multiprocessor environments to perform analyses in parallel."""

script_info['script_usage']=[]
script_info['script_usage'].append(("""Example""","""Assign taxonomy to all sequences in the input file (-i) using BLAST with the id to taxonomy mapping file (-t) and reference sequences file (-r), and write the results (-o) to $PWD/blast_assigned_taxonomy/. ALWAYS SPECIFY ABSOLUTE FILE PATHS (absolute path represented here as $PWD, but will generally look something like /home/ubuntu/my_analysis/).""","""%prog -i $PWD/inseqs.fasta -t $PWD/id_to_tax.txt -r $PWD/refseqs.fasta -o $PWD/blast_assigned_taxonomy/"""))

script_info['output_description']="""Mapping of sequence identifiers to taxonomy and quality scores."""

script_info['required_options'] = [
 make_option('-i','--input_fasta_fp',
           type='existing_filepath',help='full path to '+\
           'input_fasta_fp [REQUIRED]'),
 make_option('-o','--output_dir',action='store',
           type='new_dirpath',help='full path to store output files '+\
           '[REQUIRED]')
]

script_info['optional_options'] = [\
 make_option('-r','--reference_seqs_fp',type='existing_filepath',\
        help='Ref seqs to blast against.  Must provide either --blast_db or '
        '--reference_seqs_db for assignment with blast [default: %s]' \
        % default_reference_seqs_fp,
        default=default_reference_seqs_fp),\
 make_option('-b', '--blast_db',
        help='Database to blast against.  Must provide either --blast_db or '
        '--reference_seqs_db for assignment with blast [default: %default]'),\
Example #40
0
     "output files will be written to the directory specified by -o, and "
     "subdirectories as appropriate. ALWAYS SPECIFY ABSOLUTE FILE PATHS "
     "(absolute path represented here as $PWD, but will generally look "
     "something like /home/ubuntu/my_analysis/). ",
     "%prog -i $PWD/seqs.fna -o $PWD/swarm_otus/ -p "
     "$PWD/swarm_params.txt"))

script_info['script_usage_output_to_remove'] = ['$PWD/uclust_otus/',
                                                '$PWD/sumaclust_otus/',
                                                '$PWD/swarm_otus/']

script_info[
    'output_description'] = """This script will produce an OTU mapping file (pick_otus.py), a representative set of sequences (FASTA file from pick_rep_set.py), a sequence alignment file (FASTA file from align_seqs.py), taxonomy assignment file (from assign_taxonomy.py), a filtered sequence alignment (from filter_alignment.py), a phylogenetic tree (Newick file from make_phylogeny.py) and a biom-formatted OTU table (from make_otu_table.py)."""

script_info['required_options'] = [
    make_option('-i', '--input_fp', type='existing_filepath',
                help='the input fasta file [REQUIRED]'),
    make_option('-o', '--output_dir', type='new_dirpath',
                help='the output directory [REQUIRED]'),
]

script_info['optional_options'] = [
    make_option('-p', '--parameter_fp', type='existing_filepath',
                help='path to the parameter file, which specifies changes' +
                ' to the default behavior. ' +
                'See http://www.qiime.org/documentation/file_formats.html#qiime-parameters .' +
                ' [if omitted, default values will be used]'),
    make_option('-f', '--force', action='store_true',
                dest='force', help='Force overwrite of existing output directory' +
                ' (note: existing files in output_dir will not be removed)' +
                ' [default: %default]'),
    make_option('-w', '--print_only', action='store_true',
script_info = {}
script_info['brief_description'] = """Parallel pick otus using uclust_ref"""
script_info[
    'script_description'] = """This script works like the pick_otus.py script, but is intended to make use of multicore/multiprocessor environments to perform analyses in parallel."""
script_info['script_usage'] = []
script_info['script_usage'].append(
    ("""Example""",
     """Pick OTUs by searching $PWD/inseqs.fasta against $PWD/refseqs.fasta with reference-based uclust and write the output to the $PWD/blast_otus/ directory. This is a closed-reference OTU picking process. ALWAYS SPECIFY ABSOLUTE FILE PATHS (absolute path represented here as $PWD, but will generally look something like /home/ubuntu/my_analysis/).""",
     """%prog -i $PWD/seqs.fna -r $PWD/refseqs.fna -o $PWD/ucref_otus/"""))
script_info[
    'output_description'] = """The output consists of two files (i.e. seqs_otus.txt and seqs_otus.log). The .txt file is composed of tab-delimited lines, where the first field on each line corresponds to an OTU identifier which is the reference sequence identifier, and the remaining fields correspond to sequence identifiers assigned to that OTU. The resulting .log file contains a list of parameters passed to this script along with the output location of the resulting .txt file."""

script_info['required_options'] = [
    make_option('-i', '--input_fasta_fp', action='store',
                type='existing_filepath', help='full path to ' +
                'input_fasta_fp'),

    make_option('-o', '--output_dir', action='store',
                type='new_dirpath', help='path to store output files'),

    make_option('-r', '--refseqs_fp', action='store',
                type='existing_filepath', help='full path to ' +
                'reference collection')
]

script_info['optional_options'] = [

    make_option('-s', '--similarity', action='store',
                type='float', help='Sequence similarity ' +
                'threshold [default: %default]', default=0.97),
Example #42
0
subject id in the mapping file and processes the study to be added to Evident."""
script_info['script_usage'] = [("Process a new study","""To process a new study you need \
to download a study from the database, biom and mapping file, select a rarefaction level \
to perform the analyses, and define the column in the mapping file that has the unique \
identifier of the subjects (for example: HOST_SUBJECT_ID) and then run the following \
command:""", """%prog -i otu_table.biom -m mapping_file.txt -o processed_study -e 1000 \
-s HOST_SUBJECTY""")]
script_info['script_usage'].append(("Process a new study in parallel","""To process a \
study in parallel, using 10 jobs, you can use this command:""", """%prog -i \
otu_table.biom -m mapping_file.txt -o processed_study -e 1000 -s HOST_SUBJECTY -aO 10"""))
script_info['output_description']="""The script creates a raw.biom (original file), \
an even sampled biom file, a selectors.txt file that contains information of how evident \
should behave in the main GUI, a cleaned mapping file, a study_preference file that has \
some basic information about the study, and alpha & beta calculations. """
script_info['required_options'] = [\
 make_option('-i','--otu_table_fp',type='existing_filepath',
            help='the input biom table [REQUIRED]'),
 make_option('-m','--mapping_fp',type='existing_filepath',
            help='path to the mapping file [REQUIRED]'),
 make_option('-o','--output_dir',type='new_dirpath',
            help='the output directory [REQUIRED]'),
 make_option('-e','--seqs_per_sample',type="int",help='the input filepath'),\
 make_option('-s','--subject_name',help='the name of the subject category in the ' + 
            'mapping file, i.e. "INDIVIDUAL", "HOST_INDIVIDUAL"'),\
]
script_info['optional_options'] = [\
 make_option('-t','--tree_fp',type='existing_filepath',
        help='path to the tree file [default: %default]',
        default="/evident/data/gg_97_otus_4feb2011.tre"),
 make_option('-p','--parameter_fp',type='existing_filepath',
    help='path to the parameter file, which specifies changes to the default behavior. '\
        'See http://www.qiime.org/documentation/file_formats.html#qiime-parameters .'\
Example #43
0
    'script_description'] = """A simple scripts that prints out the qiime config settings and does some sanity checks."""
script_info['script_usage'] = []
script_info['script_usage'].append(
    ("Example 1", """Print qiime config settings:""", """print_qiime_config.py"""))
script_info['script_usage'].append(
    ("Example 2", """Print and check qiime config settings for sanity:""",
     """print_qiime_config.py -t"""))

script_info[
    'output_description'] = """This prints the qiime_config to stdout."""
script_info['version'] = __version__
script_info['help_on_no_arguments'] = False
script_info['required_options'] = []
script_info['optional_options'] = [
    make_option('-t', '--test',
                action='store_true',
                default=False,
                help='Test the QIIME install and configuration [default: %default]'),
    make_option('-b',
                '--qiime_base_install',
                action='store_true',
                default=False,
                help='If passed, report only on dependencies required for the QIIME base install [default: %default]'),
    make_option('--haiku',
                action='store_true',
                default=False,
                help=SUPPRESS_HELP)
]


class QIIMEConfig(TestCase):
Example #44
0
)]

script_info['output_description'] = """
prefix_dereplicated.sff.txt: human readable sff file containing the flowgram of the
                             cluster representative of each cluster.

prefix_dereplicated.fasta: Fasta file containing the cluster representative of each cluster.

prefix_mapping.txt: This file contains the actual clusters. The cluster centroid is given first,
                    the cluster members follw after the ':'.
"""

script_info['required_options'] = [
    make_option('-i',
                '--input_files',
                action='store',
                type='existing_filepaths',
                dest='sff_fps',
                help='path to flowgram files (.sff.txt), ' + 'comma separated')
]

script_info['optional_options'] = [
    make_option('-f',
                '--fasta_file',
                action='store',
                type='string',
                dest='fasta_fp',
                help='path to fasta input file ' + '[default: %default]',
                default=None),
    make_option('-s',
                '--squeeze',
                action='store_true',
Example #45
0
This script transforms a series of files, named (e.g. alpha_rarefaction_20_0.txt, alpha_rarefaction_20_1.txt, etc.) into a (usually much smaller) set of files named (e.g. chao1.txt, PD_whole_tree.txt, etc.), where the columns correspond to samples and rows to the rarefaction files inputted, as shown by the following:

==========================  ====================    =========   ======  ======
\                           sequences per sample    iteration   PC.354  PC.355
==========================  ====================    =========   ======  ======
alpha_rarefaction_20_0.txt  20                      0           0.925   0.915 
alpha_rarefaction_20_1.txt  20                      1           0.9     0.89 
alpha_rarefaction_20_2.txt  20                      2           0.88    0.915 
alpha_rarefaction_20_3.txt  20                      3           0.91    0.93 
...                         ...                     ...         ...     ...
==========================  ====================    =========   ======  ====== 

"""
script_info['required_options']=[\
 make_option('-i', '--input_path', type='existing_path',
 help='input path (a directory)'),
 make_option('-o', '--output_path', type='new_dirpath',
 help='output path (a directory).  will be created if needed')
]

script_info['optional_options']=[\
 make_option('-e', '--example_path',type='existing_filepath',
 help='example alpha_diversity analysis file, containing all samples'+\
 ' and all metrics to be included in the collated result'+\
 '[Default: chosen automatically (see usage string)]')
]
script_info['version'] = __version__

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    
Example #46
0
    """""",\
    """%prog -i rep_seqs.tre -t tips_to_keep.txt -o negated.tre -n"""))
script_info['script_usage'].append(("""Prune a tree to include only the tips found in the fasta file provided""",\
    """""",\
    """%prog -i rep_seqs.tre -f fast_f.fna -o pruned_fast.tre"""))
script_info['output_description'] = \
    """Output is a pruned tree in newick format."""

script_info['required_options']=[\
 make_option('-i',
              '--input_tree_filepath',
              action='store',
              type='existing_filepath',
              dest='input_tree_fp',
              help='input tree filepath'),

make_option('-o',
            '--output_tree_filepath',
            action='store',
            type='new_filepath',
            dest='output_tree_fp',
            help='output tree filepath'),\
]

script_info['optional_options']=[\
 make_option('-n',
              '--negate',
              default=False,
              action='store_true',
              help='if negate is True will remove input tips/seqs, if \
   negate is False, will retain input tips/seqs [default: %default]'                                                                    ),
Example #47
0
from cogent.parse.fasta import MinimalFastaParser
from qiime.util import qiime_blast_seqs

options_lookup = get_options_lookup()

#blast_wrapper.py
script_info={}
script_info['brief_description']="""Blast Interface"""
script_info['script_description']="""This script is a functionally-limited interface to the qiime.util.qiime_blast_seqs function, primarily useful for testing purposes. Once that function has been integrated into qiime as the primary blast interface it will move to PyCogent. An expanded version of this command line interface may replace the script functionality of cogent.app.blast at that point."""
script_info['script_usage']=[]
script_info['script_usage'].append(("""Example:""","""Blast all sequences in inseqs.fasta (-i) against a BLAST db constructed \
from refseqs.fasta (-r).""","""%prog -i inseqs.fasta -r refseqs.fasta"""))
script_info['output_description']="""This is a utility program, which returns BLAST results."""
script_info['required_options']=[\
options_lookup['fasta_as_primary_input'],
make_option('-r','--refseqs_fp',\
    help='path to blast database as a fasta file')
]
script_info['optional_options']=[\
make_option('-n','--num_seqs_per_blast_run', type='int', default='1000', \
help = 'number of sequences passed to each blast call '+\
"- useful for very large sequence collections [default: %default]")
]

script_info['version'] = __version__

def main():
    option_parser, options, args = parse_command_line_parameters(**script_info)
    
    blast_results = qiime_blast_seqs(\
     seqs=MinimalFastaParser(open(options.input_fasta_fp)),\
Example #48
0
script_info[
    'script_description'] = """Compares jackknifed/bootstrapped trees (support trees) with a master tree constructed typically from the entire dataset (e.g: a resulting file from upgma_cluster.py) and outputs support for nodes.

if support trees do not have all tips that master has (e.g. because samples with few sequences were dropped during a jackknifing analysis), the output master tree will have only those tips included in all support trees

if support trees have tips that the master tree does not, those tips will be ignored (removed from the support tree during analysis)"""
script_info['script_usage'] = []
script_info['script_usage'].append((
    """Example:""",
    """Given the sample upgma tree generated by the user for the entire dataset, the directory of bootstrap/jackknife supported trees (e.g.: the resulting directory from upgma_cluster.py) and the directory to write the results for the tree comparisons, the following command compares the support trees with the master:""",
    """%prog -m input_master_tree.tre -s bootstrapped_trees/ -o output/"""))
script_info[
    'output_description'] = """The result of tree_compare.py contains the master tree, now with internal nodes uniquely named, a separate bootstrap/jackknife support file, listing the support for each internal node, and a jackknife_named_nodes.tre tree, where internal nodes are named with their support values from 0 to 1.0, for use with tree visualization software (e.g. FigTree)."""
script_info['required_options'] = [
    make_option('-m',
                '--master_tree',
                type='existing_filepath',
                help='master tree filepath'),
    make_option('-s',
                '--support_dir',
                type='existing_path',
                help='path to dir containing support trees'),
    make_option('-o',
                '--output_dir',
                type='new_dirpath',
                help='output directory, writes three files here ' +
                "makes dir if it doesn't exist")
]
script_info['optional_options'] = []
script_info['version'] = __version__

Example #49
0
script_info={}
script_info['brief_description']="""Get the reverse complement of all sequences"""
script_info['script_description']="""Write the reverse complement of all seqs in seqs.fasta (-i) to seqs_rc.fasta (default, change output_fp with -o). Each sequence description line will have ' RC' appended to the end of it (default,
leave sequence description lines untouched by passing -r):"""
script_info['script_usage']=[]
script_info['script_usage'].append(("""Example:""",
"""Reverse complement all sequences in seqs.fna and write result to seqs_rc.fna""",
"""%prog -i seqs.fna"""))
script_info['output_description']=""""""
script_info['required_options']=[\
   options_lookup['fasta_as_primary_input']\
]
script_info['optional_options']=[\
   options_lookup['output_fp'],\
   make_option('-r','--retain_seq_id',action='store_true',\
        help='leave seq description lines untouched'+\
        ' [default: append " RC" to seq description lines]')
] 
script_info['version'] = __version__


def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
      
    verbose = opts.verbose
    
    input_fasta_fp = opts.input_fasta_fp
    output_fp = opts.output_fp
    retain_seq_id = opts.retain_seq_id
    
    if retain_seq_id:
Example #50
0
script_info = {}
script_info['brief_description'] = "Given a directory of per-swath qseq files,\
 this script generates a single fastq per lane."

script_info['script_description'] = ""
script_info['script_usage'] = [
    ("", "Generate fastq files from all lanes of read 1 data in the current\
 directory.", "process_qseq.py -i ./ -o ./fastq/ -r 1"),
    ("", "Generate fastq files from all lanes of read 2 data in the current\
 directory, truncating the sequences after the first 12 bases.",
     "process_qseq.py -i ./ -o ./fastq/ -r 2 -b 12")
]
script_info['output_description'] = ""
script_info['required_options'] = [
    make_option('-i',
                '--input_dir',
                type='existing_dirpath',
                help='the input directory'),
    make_option('-o',
                '--output_dir',
                type='new_dirpath',
                help='the output directory'),
    make_option('-r', '--read', help='the read number to consider', type='int')
]
script_info['optional_options'] = [
    make_option(
        '-l',
        '--lanes',
        type='string',
        help='the lane numbers to consider, comma-separated [defaut: %default]',
        default='1,2,3,4,5,6,7,8'),
    make_option(
Example #51
0
    'brief_description'] = "Script for sorting the sample IDs in an OTU table based on a specified value in a mapping file."
script_info['script_description'] = ""
script_info['script_usage'] = [("Default",
                                "case insensitive natural sorting"
                                " i.e. SAMP0, samp1, SAMP2, samp10, samp12",
                                "%prog -i otu_table.biom -o sorted_otu_table.biom"),
                               ("",
                                "sort samples by the age field in the mapping file",
                                "%prog -i otu_table.biom -o dob_sorted_otu_table.biom -m Fasting_Map.txt -s DOB"),
                               ("",
                                "sort samples based on order in a file where each line starts with a sample id",
                                "%prog -i otu_table.biom -o sorted_otu_table.biom -l sample_id_list.txt")]
script_info['output_description'] = ""
script_info['required_options'] = [
    make_option('-i', '--input_otu_table',
                help='Input OTU table filepath in BIOM format.',
                type='existing_filepath'),
    make_option('-o', '--output_fp',
                help='Output OTU table filepath.',
                type='new_filepath'),
]
script_info['optional_options'] = [
    make_option('-m', '--mapping_fp',
                help='Input metadata mapping filepath. [default: %default]',
                type='existing_filepath'),
    make_option('-s', '--sort_field', type='string',
                help='Category to sort OTU table by. [default: %default]'),
    make_option('-l', '--sorted_sample_ids_fp',
                help='Sorted sample id filepath [default: %default]',
                type='existing_filepath')
]
    'script_description'] = (
        "This script provides a convenient interface for merging mapping "
        "files which contain data on different samples.")
script_info['script_usage'] = []
script_info['script_usage'].append(
    ("Example:",
     "Merge two mapping files into a new mapping file (merged_mapping.txt). "
     "In cases where a mapping field is not provided for some samples, add "
     "the value 'Data not collected'.",
     "%prog -m map_controls.txt,map_fasting.txt -o merged_mapping.txt -n "
     "'Data not collected'"))
script_info[
    'output_description'] = (
        "The result of this script is a merged mapping file (tab-delimited).")
script_info['required_options'] = [
    make_option('-m', '--mapping_fps', type='existing_filepaths',
                help='the input mapping files in a comma-separated list'),
    make_option('-o', '--output_fp', type='new_filepath',
                help='the output mapping file to write'),
]

script_info['optional_options'] = [
    make_option('-n', '--no_data_value',
                help='value to represent missing data (i.e., when all '
                'fields are not defined in all mapping files) [default: '
                '%default]', default='no_data'),
    make_option('--case_insensitive', action='store_true', default=False,
                help='if present the headers will be merged case insensitivly '
                'and transformed to upper case [default: %default]')
]
script_info['version'] = __version__
)

script_info[
    "output_description"
] = """
The result of this script will be a folder containing images and/or an HTML
file (with appropriate javascript files), depending on the user-defined
parameters.
"""

script_info["required_options"] = [
    make_option(
        "-d",
        "--distance_matrix_file",
        help="Input distance matrix filepath (i.e. the result of "
        "beta_diversity.py). WARNING: Only symmetric, hollow distance "
        "matrices may be used as input. Asymmetric distance matrices, such as "
        "those obtained by the UniFrac Gain metric (i.e. beta_diversity.py "
        "-m unifrac_g), should not be used as input",
        type="existing_filepath",
    ),
    make_option(
        "-m", "--map_fname", dest="map_fname", help="Input metadata mapping filepath.", type="existing_filepath"
    ),
]

script_info["optional_options"] = [
    make_option(
        "-p",
        "--prefs_path",
        help="Input user-generated preferences filepath. NOTE: This is a "
        "file with a dictionary containing preferences for the analysis. "
Example #54
0
script_info['script_usage']=[\
    ("""Example:""",
     """Denoise flowgrams in file 454Reads.sff.txt, discard flowgrams not in seqs.fna, and extract primer from map.txt:""",
     """%prog -i 454Reads.sff.txt -f seqs.fna -m map.txt"""),

    ("""Multi-core Example:""",
     """Denoise flowgrams in file 454Reads.sff.txt using 2 cores on your machine in parallel:""",
     """%prog -n 2 -i 454Reads.sff.txt -f seqs.fna -m map.txt""")
    ]

script_info[
    'output_description'] = """This script results in a OTU like mapping file along with a sequence file of denoised (FASTA-format). Note that the sequences coming from denoising are no real OTUs, and have to be sent to pick_otus.py if the users wishes to have a defined similarity threshold."""

script_info['required_options'] = [\
    make_option('-i','--input_file', action='store',
                type='existing_filepaths', dest='sff_fps',
                help='path to flowgram files (.sff.txt), '+
                'comma separated'),

    make_option('-f','--fasta_file', action='store',
                type='existing_filepath', dest='fasta_fp',
                help='path to fasta file from split_libraries.py')
    ]

script_info['optional_options'] = [\
    make_option('-o','--output_dir', action='store',
                type='new_dirpath', dest='output_dir',
                help='path to output directory '+
                '[default: %default]',
                default="denoised_seqs/"),

    make_option('-n','--num_cpus', action='store',
Example #55
0
 parallel."""
script_info['script_usage']=[]
script_info['script_usage'].append(
("""Example""",
"""BLAST $PWD/inseqs.fasta (-i) against a blast database created from\
 $PWD/refseqs.fasta (-r). Store the results in $PWD/blast_out/ (-o). ALWAYS\
 SPECIFY ABSOLUTE FILE PATHS (absolute path represented here as $PWD, but will\
 generally look something like /home/ubuntu/my_analysis/).""",
"""%prog -i $PWD/inseqs.fasta -r $PWD/refseqs.fasta -o $PWD/blast_out/\
 -e 0.001"""))


script_info['output_description']=""" """
script_info['required_options'] = [
 make_option('-i','--infile_path',action='store',
          type='existing_filepath',dest='infile_path',
          help='Path of sequences to use as queries [REQUIRED]'),
 make_option('-o', '--output_dir',type='new_dirpath',
        help='name of output directory for blast jobs [REQUIRED]')
]
script_info['optional_options'] = [\
 make_option('-c','--disable_low_complexity_filter',
        default=False,action='store_true',
        help='disable filtering of low-complexity sequences '
             '(i.e., -F F is passed to blast) [default: %default]'),\
 make_option('-e','--e_value',action='store',\
        type='float', default=1e-30, dest='e_value',
        help='E-value threshold for blasts [default: %default]'),\
 make_option('-n','--num_hits',action='store',\
        type='int', default=1, dest='num_hits',
        help='number of hits per query for blast results [default: %default]'),\
Example #56
0
script_info = {}
script_info['brief_description'] = """Blast Interface"""
script_info[
    'script_description'] = """This script is a functionally-limited interface to the qiime.util.qiime_blast_seqs function, primarily useful for testing purposes. Once that function has been integrated into qiime as the primary blast interface it will move to PyCogent. An expanded version of this command line interface may replace the script functionality of cogent.app.blast at that point."""
script_info['script_usage'] = []
script_info['script_usage'].append((
    """Example:""",
    """Blast all sequences in inseqs.fasta (-i) against a BLAST db constructed \
from refseqs.fasta (-r).""",
    """%prog -i $PWD/inseqs.fasta -r $PWD/refseqs.fasta"""))
script_info[
    'output_description'] = """This is a utility program, which returns BLAST results."""
script_info['required_options'] = [
    options_lookup['fasta_as_primary_input'],
    make_option('-r',
                '--refseqs_fp',
                type='string',
                help='path to blast database as a fasta file')
]
script_info['optional_options'] = [
    make_option(
        '-n',
        '--num_seqs_per_blast_run',
        type='int',
        default='1000',
        help='number of sequences passed to each blast call ' +
        "- useful for very large sequence collections [default: %default]")
]

script_info['version'] = __version__

reinstated). These OTU maps can then be used to filter in the input FASTA file. 

Output file naming:
contamination_summary.txt -- tab-delimited per-sequence summary file
assed_otu_map.txt -- OTU map of non-contaminant sequences
ref_contaminants_otu_map.txt -- OTU map of reference contaminant sequences
abund_contaminants_otu_map.txt -- OTU map of abundance contaminant sequences
reinstated_contaminants_otu_map.txt -- OTU map of reinstated sequences
"""
script_info['required_options'] = [
    options_lookup["output_dir"]
    ]
script_info['optional_options'] = [
    options_lookup["otu_table_as_primary_input"],
    make_option('--mothur_counts_fp',
                type='existing_filepath',
                help='path to mothur counts table as input'),
    options_lookup["mapping_fp"],
    make_option('-M', '--otu_map_fp', type="existing_filepath",
                 help='the input OTU map file'),
    make_option('-s',
                '--valid_states', type='string',
                help="Column header:value pair in mapping file identifying blank samples"),
    make_option('--blank_id_fp',
                type='existing_filepath',
                help='path to file listing blank sample ids'),
    options_lookup["input_fasta"],
    make_option('--contaminant_db_fp', type="existing_filepath",
              help='A FASTA file of potential contaminant sequences'),
    make_option('-c', '--contaminant_similarity', type='float', default=0.97,
                help=('Sequence similarity threshold for contaminant matches')),
from qiime.filter import (sample_ids_from_metadata_description, 
                          filter_samples_from_otu_table,
                          filter_mapping_file)
from qiime.format import format_mapping_file, format_biom_table

script_info = {}
script_info['brief_description'] = "Filters samples from an OTU table on the basis of the number of observations in that sample, or on the basis of sample metadata. Mapping file can also be filtered to the resulting set of sample ids."
script_info['script_description'] = ""
script_info['script_usage'] = [("Abundance filtering (low coverage)","Filter samples with fewer than 150 observations from the otu table.","%prog -i otu_table.biom -o otu_table_no_low_coverage_samples.biom -n 150"),
("Abundance filtering (high coverage)","Filter samples with greater than 149 observations from the otu table.","%prog -i otu_table.biom -o otu_table_no_high_coverage_samples.biom -x 149"),
("Metadata-based filtering (positive)","Filter samples from the table, keeping samples where the value for 'Treatment' in the mapping file is 'Control'","%prog -i otu_table.biom -o otu_table_control_only.biom -m map.txt -s 'Treatment:Control'"),
("Metadata-based filtering (negative)","Filter samples from the table, keeping samples where the value for 'Treatment' in the mapping file is not 'Control'","%prog -i otu_table.biom -o otu_table_not_control.biom -m map.txt -s 'Treatment:*,!Control'"),
("List-based filtering","Filter samples where the id is listed in samples_to_keep.txt","%prog -i otu_table.biom -o otu_table_samples_to_keep.biom --sample_id_fp samples_to_keep.txt")]
script_info['output_description']= ""
script_info['required_options'] = [
 make_option('-i','--input_fp',type="existing_filepath",
             help='the input otu table filepath in biom format'),
 make_option('-o','--output_fp',type="new_filepath",
             help='the output filepath in biom format'),
]
script_info['optional_options'] = [
 make_option('-m',
             '--mapping_fp',
             type='existing_filepath',
             help='path to the map file [default: %default]'),
 make_option('--output_mapping_fp',
             type='new_filepath',
             help='path to write filtered mapping file [default: filtered mapping file is not written]'),
 make_option('--sample_id_fp',
             type='existing_filepath',
             help='path to file listing sample ids to keep [default: %default]'),
 make_option('-s',
Example #59
0
                                    "Alternatively, you can set the plot background '-k'",
                                    """%prog -i alpha_div_collated/ -m Fasting_Map.txt -k black"""))

script_info[
    'script_usage'].append(("""Generate raw data without interactive webpages:""",
                            "The user can choose to not create an interactive webpage ('-w' option). "
                            "This is for the case, where the user just wants the average plots and the"
                            "raw average data.",
                            """%prog -i alpha_div_collated/ -m Fasting_Map.txt -w"""))


script_info[
    'output_description'] = """The result of this script produces a folder and within that folder there is a sub-folder containing image files. Within the main folder, there is an html file."""
script_info['required_options'] = [
    make_option('-i', '--input_dir',
                help='Input directory containing results from collate_alpha.py.' +
                ' [REQUIRED]',
                type='existing_dirpath'),
    make_option('-m', '--map_fname',
                help='Input metadata mapping filepath. [REQUIRED]',
                type='existing_filepath')
]
script_info['optional_options'] = [
    make_option('-b', '--colorby', dest='colorby', type='string',
                help='Comma-separated list categories metadata categories' +
                ' (column headers) ' +
                'to color by in the plots. The categories must match the name of a ' +
                'column header in the mapping file exactly. Multiple categories ' +
                'can be list by comma separating them without spaces. The user can ' +
                'also combine columns in the mapping file by separating the ' +
                'categories by "&&" without spaces. [default=color by all]'),
    make_option('-p', '--prefs_path',
    """%prog -i $PWD/inseqs_aligned.fasta -o $PWD/chimera_slayer_chimeric_seqs.txt"""
))

script_info[
    'output_description'] = """The result of parallel_identify_chimeric_seqs.py is a text file that identifies which sequences are chimeric."""

script_info['required_options'] = [
    options_lookup['fasta_as_primary_input'],
]

chimera_detection_method_choices = ['blast_fragments', 'ChimeraSlayer']

script_info['optional_options'] = [
    make_option('-a',
                '--aligned_reference_seqs_fp',
                type='existing_filepath',
                default=qiime_config['pynast_template_alignment_fp'],
                help='Path to (Py)Nast aligned reference sequences. '
                'REQUIRED when method ChimeraSlayer [default: %default]'),
    make_option(
        '-t',
        '--id_to_taxonomy_fp',
        type='existing_filepath',
        help='Path to tab-delimited file mapping sequences to assigned '
        'taxonomy. Each assigned taxonomy is provided as a comma-separated '
        'list. [default: %default; REQUIRED when method is blast_fragments]'),
    make_option(
        '-r',
        '--reference_seqs_fp',
        type='existing_filepath',
        help=
        'Path to reference sequences (used to build a blast db when method blast_fragments). '