Esempio n. 1
0
def primitive_unions():
    yield ParamTemplate('disjoint', Int % (Range(5, 10) | Range(15, 20)), int,
                        (5, 9, 15, 19))
    yield ParamTemplate('auto_int',
                        Int % Range(1, None) | Str % Choices('auto'), object,
                        (1, 10, 'auto'))
    yield ParamTemplate(
        'kitchen_sink', Float % Range(0, 1) | Int
        | Str % Choices('auto', 'Beef') | Bool | Float % Range(10, 11), object,
        (0.5, 1000, 'Beef', 'auto', True, False, 10.103))
Esempio n. 2
0
 def test_primitive_predicate(self):
     self.assert_roundtrip(Int % Range(0, 10))
     self.assert_roundtrip(
         Int % (Range(0, 10) | Range(50, 100, inclusive_end=True)))
     self.assert_roundtrip(Float % Range(None, 10))
     self.assert_roundtrip(Float % Range(0, None))
     self.assert_roundtrip(Str % Choices("A"))
     self.assert_roundtrip(Str % Choices(["A"]))
     self.assert_roundtrip(Str % Choices("A", "B"))
     self.assert_roundtrip(Str % Choices(["A", "B"]))
     self.assert_roundtrip(Bool % Choices(True))
     self.assert_roundtrip(Bool % Choices(False))
Esempio n. 3
0
def int_params():
    yield ParamTemplate('single_int', Int, int, (-1, 0, 1))
    yield ParamTemplate('int_range_1_param', Int % Range(3), int, (-42, 0, 2))
    yield ParamTemplate('int_range_1_param_i_e',
                        Int % Range(3, inclusive_end=True), int, (-43, 0, 3))
    yield ParamTemplate('int_range_2_params', Int % Range(-3, 4), int,
                        (-3, 0, 3))
    yield ParamTemplate('int_range_2_params_i_e',
                        Int % Range(-3, 4, inclusive_end=True), int,
                        (-3, 0, 4))
    yield ParamTemplate('int_range_2_params_no_i',
                        Int % Range(-3, 4, inclusive_start=False), int,
                        (-2, 0, 3))
    yield ParamTemplate(
        'int_range_2_params_i_e_ex_s',
        Int % Range(-3, 4, inclusive_start=False, inclusive_end=True), int,
        (-2, 0, 4))
Esempio n. 4
0
def float_params():
    yield ParamTemplate('single_float', Float, float, (-1.5, 0.0, 1.5))
    yield ParamTemplate('float_range_1_param', Float % Range(2.5), float,
                        (-42.5, 0.0, 2.49))
    yield ParamTemplate('float_range_1_param_i_e',
                        Float % Range(2.5, inclusive_end=True), float,
                        (-42.5, 0.0, 2.5))
    yield ParamTemplate('float_range_2_params', Float % Range(-3.5, 3.5),
                        float, (-3.5, 0.0, 3.49))
    yield ParamTemplate('float_range_2_params_i_e',
                        Float % Range(-3.5, 3.5, inclusive_end=True), float,
                        (-3.5, 0.0, 3.5))
    yield ParamTemplate('float_range_2_params_no_i',
                        Float % Range(-3.5, 3.5, inclusive_start=False), float,
                        (-3.49, 0.0, 3.49))
    yield ParamTemplate(
        'float_range_2_params_i_e_ex_s',
        Float % Range(-3.5, 3.5, inclusive_start=False, inclusive_end=True),
        float, (-3.49, 0.0, 3.49))
Esempio n. 5
0
                 'untangled later.'),
    inputs={
        'sequences': FeatureData[Sequence]
    },
    outputs=[
        ('collapsed_kmers', FeatureData[Sequence]), 
        ('kmer_map', FeatureData[KmerMap]),
    ],
    parameters={
        'trim_length': Int,
        'region': Str,
        'fwd_primer': Str,
        'rev_primer': Str,
        'reverse_complement_rev': Bool,
        'reverse_complement_result': Bool,
        'chunk_size':  (Int % Range(1, None)),
        'n_workers': Int % Range(1, None),
        'client_address': Str,
        'debug': Bool,

    },
    input_descriptions={
        'sequences': 'The full length sequences from the reference database.',
    },
    output_descriptions={
        'collapsed_kmers': ('Reference kmer sequences for the region with '
                            'the degenerate sequences expanded and '
                            'duplicated sequences identified.'
                            ),
        'kmer_map': ('A mapping relationship between the name of the '
                     'sequence in the database and the kmer identifier used '
citations = Citations.load('citations.bib', package='q2_phylogenomics')

plugin = Plugin(
    name='phylogenomics',
    version=q2_phylogenomics.__version__,
    website='https://github.com/qiime2/q2-phylogenomics',
    package='q2_phylogenomics',
    description='A QIIME 2 plugin for phylogenomics analyses.',
    short_description='A QIIME 2 plugin for phylogenomics analyses.',
)

prinseq_input = {'demultiplexed_sequences': 'The sequences to be trimmed.'}
prinseq_output = {'trimmed_sequences': 'The resulting trimmed sequences.'}

prinseq_parameters = {
    'trim_qual_right': Int % Range(1, None),
    'trim_qual_type': Str % Choices(['min', 'mean', 'max', 'sum']),
    'trim_qual_window': Int % Range(1, None),
    'min_qual_mean': Int % Range(1, None),
    'min_len': Int % Range(1, None),
    'lc_method': Str % Choices(['dust', 'entropy']),
    'lc_threshold': Int % Range(0, 100),
    'derep': List[Str % Choices(list('12345'))]}

prinseq_parameter_descriptions = {
    'trim_qual_right': 'Trim sequence by quality score from the 3\'-end with '
                       'this threshold score.',
    'trim_qual_type': 'Type of quality score calculation to use. Allowed '
                      'options are min, mean, max and sum.',
    'trim_qual_window': 'The sliding window size used to calculate quality '
                        'score by type. To stop at the first base that fails '
Esempio n. 7
0
citations = Citations.load('citations.bib', package='q2_alignment')
plugin = Plugin(
    name='alignment',
    version=q2_alignment.__version__,
    website='https://github.com/qiime2/q2-alignment',
    package='q2_alignment',
    description=('This QIIME 2 plugin provides support for generating '
                 'and manipulating sequence alignments.'),
    short_description='Plugin for generating and manipulating alignments.'
)

plugin.methods.register_function(
    function=q2_alignment.mafft,
    inputs={'sequences': FeatureData[Sequence]},
    parameters={'n_threads': Int % Range(0, None),
                'parttree': Bool},
    outputs=[('alignment', FeatureData[AlignedSequence])],
    input_descriptions={'sequences': 'The sequences to be aligned.'},
    parameter_descriptions={
        'n_threads': 'The number of threads. (Use 0 to automatically use all '
                     'available cores)',
        'parttree': 'This flag is required if the number of sequences being '
                    'aligned are larger than 1000000. Disabled by default'},
    output_descriptions={'alignment': 'The aligned sequences.'},
    name='De novo multiple sequence alignment with MAFFT',
    description=("Perform de novo multiple sequence alignment using MAFFT."),
    citations=[citations['katoh2013mafft']]
)

plugin.methods.register_function(
Esempio n. 8
0
plugin.register_semantic_types(CSIFolder)
plugin.register_semantic_type_to_format(CSIFolder,
                                        artifact_format=CSIDirFmt)

plugin.register_views(TSVMoleculesFormat)
plugin.register_semantic_types(Molecules)
plugin.register_semantic_type_to_format(FeatureData[Molecules],
                                        artifact_format=TSVMoleculesFormat)

PARAMS = {
    'ionization_mode': Str % Choices(['positive', 'negative', 'auto']),
    'database': Str % Choices(['all', 'pubchem']),
    'sirius_path': Str,
    'profile': Str % Choices(['qtof', 'orbitrap', 'fticr']),
    'fingerid_db': Str % Choices(['all', 'pubchem', 'bio', 'kegg', 'hmdb']),
    'ppm_max': Int % Range(0, 30, inclusive_end=True),
    'n_jobs': Int % Range(1, None),
    'num_candidates': Int % Range(5, 100, inclusive_end=True),
    'tree_timeout': Int % Range(600, 3000, inclusive_end=True),
    'maxmz': Int % Range(100, 850, inclusive_end=True),
    'zodiac_threshold': Float % Range(0, 1, inclusive_end=True),
    'java_flags': Str
}

PARAMS_DESC = {
    'ionization_mode': 'Ionization mode for mass spectrometry',
    'database': 'search formulas in given database',
    'sirius_path': 'path to Sirius executable',
    'ppm_max': 'allowed parts per million tolerance for decomposing masses',
    'profile': 'configuration profile for mass-spec platform used',
    'n_jobs': 'Number of cpu cores to use',
Esempio n. 9
0
    'the estimator.')

inputs = {'table': FeatureTable[Frequency]}

input_descriptions = {
    'table': 'Feature table containing all features that '
    'should be used for target prediction.',
    'probabilities': 'Predicted class probabilities for '
    'each input sample.'
}

parameters = {
    'base': {
        'random_state': Int,
        'n_jobs': Int,
        'n_estimators': Int % Range(1, None),
        'missing_samples': Str % Choices(['error', 'ignore'])
    },
    'splitter': {
        'test_size':
        Float % Range(0.0, 1.0, inclusive_end=False, inclusive_start=False)
    },
    'rfe': {
        'step':
        Float % Range(0.0, 1.0, inclusive_end=False, inclusive_start=False),
        'optimize_feature_selection': Bool
    },
    'cv': {
        'cv': Int % Range(1, None),
        'parameter_tuning': Bool
    },
Esempio n. 10
0
citations = Citations.load('citations.bib', package='q2_feature_table')
plugin = Plugin(
    name='feature-table',
    version=q2_feature_table.__version__,
    website='https://github.com/qiime2/q2-feature-table',
    package='q2_feature_table',
    short_description=('Plugin for working with sample by feature tables.'),
    description=('This is a QIIME 2 plugin supporting operations on sample '
                 'by feature tables, such as filtering, merging, and '
                 'transforming tables.'))

plugin.methods.register_function(
    function=q2_feature_table.rarefy,
    inputs={'table': FeatureTable[Frequency]},
    parameters={
        'sampling_depth': Int % Range(1, None),
        'with_replacement': Bool
    },
    outputs=[('rarefied_table', FeatureTable[Frequency])],
    input_descriptions={'table': 'The feature table to be rarefied.'},
    parameter_descriptions={
        'sampling_depth': ('The total frequency that each sample should be '
                           'rarefied to. Samples where the sum of frequencies '
                           'is less than the sampling depth will be not be '
                           'included in the resulting table unless '
                           'subsampling is performed with replacement.'),
        'with_replacement': ('Rarefy with replacement by sampling from the '
                             'multinomial distribution instead of rarefying '
                             'without replacement.')
    },
    output_descriptions={
Esempio n. 11
0
    citations=[]
)

plugin.visualizers.register_function(
    function=heatmap,
    inputs={'ranks': FeatureData[Conditional]},
    parameters={
        'microbe_metadata': MetadataColumn[Categorical],
        'metabolite_metadata': MetadataColumn[Categorical],
        'method': Str % Choices(_heatmap_choices['method']),
        'metric': Str % Choices(_heatmap_choices['metric']),
        'color_palette': Str % Choices(_cmaps['heatmap']),
        'margin_palette': Str % Choices(_cmaps['margins']),
        'x_labels': Bool,
        'y_labels': Bool,
        'level': Int % Range(-1, None),
        'row_center': Bool,
    },
    input_descriptions={'ranks': 'Conditional probabilities.'},
    parameter_descriptions={
        'microbe_metadata': 'Optional microbe metadata for annotating plots.',
        'metabolite_metadata': 'Optional metabolite metadata for annotating '
                               'plots.',
        'method': 'Hierarchical clustering method used in clustermap.',
        'metric': 'Distance metric used in clustermap.',
        'color_palette': 'Color palette for clustermap.',
        'margin_palette': 'Name of color palette to use for annotating '
                          'metadata along margin(s) of clustermap.',
        'x_labels': 'Plot x-axis (metabolite) labels?',
        'y_labels': 'Plot y-axis (microbe) labels?',
        'level': 'taxonomic level for annotating clustermap. Set to -1 if not '
Esempio n. 12
0
              'selected. If input sequences are paired end, plots will be '
              'generated for both forward and reverse reads for the same `n` '
              'sequences.')
    },
    name='Summarize counts per sample.',
    description=('Summarize counts per sample for all samples, and generate '
                 'interactive positional quality plots based on `n` randomly '
                 'selected sequences.')
)

plugin.methods.register_function(
    function=q2_demux.subsample_single,
    inputs={'sequences': SampleData[SequencesWithQuality |
                                    PairedEndSequencesWithQuality]},
    parameters={'fraction': Float % Range(0, 1,
                                          inclusive_start=False,
                                          inclusive_end=False)},
    outputs=[
        ('subsampled_sequences', SampleData[SequencesWithQuality])
    ],
    input_descriptions={
        'sequences': 'The demultiplexed sequences to be subsampled.'
    },
    parameter_descriptions={
        'fraction': ('The fraction of sequences to retain in subsample.')
    },
    output_descriptions={
        'subsampled_sequences': 'The subsampled sequences.'
    },
    name='Subsample single-end sequences without replacement.',
    description=('Generate a random subsample of single-end sequences '
Esempio n. 13
0
                    'Accent', 'Dark2', 'tab10', 'tab20', 'tab20b', 'tab20c',
                    'viridis', 'plasma', 'inferno', 'magma', 'terrain',
                    'rainbow'])}

taxa_inputs_descriptions = {
    'depth': 'Maximum depth of semicolon-delimited taxonomic ranks to '
             'test (e.g., 1 = root, 7 = species for the greengenes '
             'reference sequence database).',
    'palette': 'Color palette to utilize for plotting.'}


plugin.methods.register_function(
    function=exclude_seqs,
    inputs=seq_inputs,
    parameters={'method': Str % Choices(['blast', 'vsearch', 'blastn-short']),
                'perc_identity': Float % Range(0.0, 1.0, inclusive_end=True),
                'evalue': Float,
                'perc_query_aligned': Float,
                'threads': Int % Range(1, None)},
    outputs=[('sequence_hits', FeatureData[Sequence]),
             ('sequence_misses', FeatureData[Sequence])],
    input_descriptions=seq_inputs_descriptions,
    parameter_descriptions={
        'method': ('Alignment method to use for matching feature sequences '
                   'against reference sequences'),
        'perc_identity': ('Reject match if percent identity to reference is '
                          'lower. Must be in range [0.0, 1.0]'),
        'evalue': ('BLAST expectation (E) value threshold for saving hits. '
                   'Reject if E value is higher than threshold. This '
                   'threshold is disabled by default.'),
        'perc_query_aligned': (
Esempio n. 14
0
                 'metadata.'),
    short_description='Plugin for exploring community diversity.',
)

plugin.pipelines.register_function(
    function=q2_diversity.beta_phylogenetic,
    inputs={
        'table': FeatureTable[Frequency | RelativeFrequency | PresenceAbsence],
        'phylogeny': Phylogeny[Rooted]
    },
    parameters={
        'metric':
        Str % Choices(beta.METRICS['PHYLO']['IMPL']
                      | beta.METRICS['PHYLO']['UNIMPL']),
        'threads':
        Int % Range(1, None) | Str % Choices(['auto']),
        'variance_adjusted':
        Bool,
        'alpha':
        Float % Range(0, 1, inclusive_end=True),
        'bypass_tips':
        Bool
    },
    outputs=[('distance_matrix', DistanceMatrix)],
    input_descriptions={
        'table': ('The feature table containing the samples over which beta '
                  'diversity should be computed.'),
        'phylogeny': ('Phylogenetic tree containing tip identifiers that '
                      'correspond to the feature identifiers in the table. '
                      'This tree can contain tip ids that are not present in '
                      'the table, but all feature ids in the table must be '
Esempio n. 15
0
    return result


@plugin.register_transformer
def _2(fmt: IDSelectionDirFmt) -> qiime2.Metadata:
    md = fmt.metadata.view(IDMetadataFormat).to_metadata()
    return md.filter_ids(fmt.included.view(UNIXListFormat).to_list())


plugin.methods.register_function(
    function=subsample_random,
    inputs={},
    parameters={
        'ids': Metadata,
        'n': Int % Range(1, None),
        'seed': Int
    },
    outputs=[('selection', FeatureData[Selection])],
    parameter_descriptions={
        'ids': 'IDs to subsample from.',
        'n': 'Number of IDs to sample.',
        'seed': 'Random seed to use to initialize random number generator.'
    },
    input_descriptions={},
    output_descriptions={
        'selection': 'The selected IDs.'
    },
    name='Randomly sample IDs',
    description='Randomly sample IDs without replacement.'
)
Esempio n. 16
0
    return consensus


plugin.methods.register_function(
    function=classify_consensus_blast,
    inputs={
        'query': FeatureData[Sequence],
        'reference_reads': FeatureData[Sequence],
        'reference_taxonomy': FeatureData[Taxonomy]
    },
    parameters={
        'evalue':
        Float,
        'maxaccepts':
        Int % Range(1, None),
        'perc_identity':
        Float % Range(0.0, 1.0, inclusive_end=True),
        'strand':
        Str % Choices(['both', 'plus', 'minus']),
        'min_consensus':
        Float % Range(0.5, 1.0, inclusive_end=True, inclusive_start=False),
        'unassignable_label':
        Str
    },
    outputs=[('classification', FeatureData[Taxonomy])],
    input_descriptions={
        'query': 'Sequences to classify taxonomically.',
        'reference_reads': 'reference sequences.',
        'reference_taxonomy': 'reference taxonomy labels.'
    },
Esempio n. 17
0
                    if amplicon is not None:
                        skbio.write(amplicon, format='fasta', into=fh)
    if os.stat(str(ff)).st_size == 0:
        raise RuntimeError("No matches found")
    return ff


plugin.methods.register_function(
    function=extract_reads,
    inputs={'sequences': FeatureData[Sequence]},
    parameters={'trunc_len': Int,
                'trim_left': Int,
                'f_primer': Str,
                'r_primer': Str,
                'identity': Float,
                'min_length': Int % Range(0, None),
                'max_length': Int % Range(0, None),
                'n_jobs': Int % Range(1, None),
                'batch_size': Int % Range(1, None) | Str % Choices(['auto'])},
    outputs=[('reads', FeatureData[Sequence])],
    name='Extract reads from reference',
    description='Extract sequencing-like reads from a reference database.',
    parameter_descriptions={'f_primer': 'forward primer sequence',
                            'r_primer': 'reverse primer sequence',
                            'trunc_len': 'read is cut to trunc_len if '
                                         'trunc_len is positive. Applied '
                                         'before trim_left.',
                            'trim_left': 'trim_left nucleotides are removed '
                                         'from the 5\' end if trim_left is '
                                         'positive. Applied after trunc_len.',
                            'identity': 'minimum combined primer match '
Esempio n. 18
0
                description='',
                short_description='')

plugin.register_formats(PosteriorLogFormat, NexusFormat,
                        BEASTControlFileFormat, BEASTOpsFileFormat,
                        BEASTPosteriorDirFmt, NexusDirFmt)

plugin.register_semantic_types(Chain, BEAST, MCC)
plugin.register_semantic_type_to_format(Chain[BEAST],
                                        artifact_format=BEASTPosteriorDirFmt)
plugin.register_semantic_type_to_format(Phylogeny[MCC],
                                        artifact_format=NexusDirFmt)

importlib.import_module('q2_beast.transformers')

NONZERO_INT = Int % Range(1, None)
NONNEGATIVE_INT = Int % Range(0, None)

plugin.methods.register_function(
    function=gtr_single_partition,
    inputs={'alignment': FeatureData[AlignedSequence]},
    parameters={
        'time': MetadataColumn[Numeric],
        'n_generations': NONZERO_INT,
        'sample_every': NONZERO_INT,
        'time_uncertainty': MetadataColumn[Numeric],
        'base_freq': Str % Choices("estimated", "empirical"),
        'site_gamma': Int % Range(0, 10, inclusive_end=True),
        'site_invariant': Bool,
        'clock': Str % Choices("ucln", "strict"),
        'coalescent_model':
Esempio n. 19
0
citations = Citations.load('citations.bib', package='q2_alignment')
plugin = Plugin(
    name='alignment',
    version=q2_alignment.__version__,
    website='https://github.com/qiime2/q2-alignment',
    package='q2_alignment',
    description=('This QIIME 2 plugin provides support for generating '
                 'and manipulating sequence alignments.'),
    short_description='Plugin for generating and manipulating alignments.')

plugin.methods.register_function(
    function=q2_alignment.mafft,
    inputs={'sequences': FeatureData[Sequence]},
    parameters={
        'n_threads': Int % Range(1, None) | Str % Choices(['auto']),
        'parttree': Bool
    },
    outputs=[('alignment', FeatureData[AlignedSequence])],
    input_descriptions={'sequences': 'The sequences to be aligned.'},
    parameter_descriptions={
        'n_threads':
        'The number of threads. (Use `auto` to automatically use '
        'all available cores)',
        'parttree':
        'This flag is required if the number of sequences being '
        'aligned are larger than 1000000. Disabled by default'
    },
    output_descriptions={'alignment': 'The aligned sequences.'},
    name='De novo multiple sequence alignment with MAFFT',
    description=("Perform de novo multiple sequence alignment using MAFFT."),
Esempio n. 20
0
        'metadata': base_parameter_descriptions['metadata'],
        'x': coords_description.format('x'),
        'y': coords_description.format('y'),
        'z': coords_description.format('z'),
        'missing_data': base_parameter_descriptions['missing_data']},
    name='Create a distance matrix from 2D or 3D cartesian coordinates.',
    description='Measure pairwise euclidean distances between cartesian '
                'coordinates. '
                'Note that samples with missing values are silently dropped.',
)

plugin.visualizers.register_function(
    function=autocorr,
    inputs={'distance_matrix': DistanceMatrix},
    parameters={'metadata': MetadataColumn[Numeric],
                'permutations': Int % Range(0, None),
                'two_tailed': Bool,
                'transformation': Str % Choices(['R', 'B', 'D', 'V']),
                'intersect_ids': Bool},
    input_descriptions={'distance_matrix': 'Spatial distance matrix'},
    parameter_descriptions={
        'metadata': 'Variable to test for spatial autocorrelation.',
        'permutations': 'Number of random permutations for calculation of '
                        'pseudo p-values.',
        'two_tailed': 'If True (default) analytical p-values for Moran are '
                      'two tailed, otherwise if False, they are one-tailed. '
                      'This does not apply to Geary\'s C.',
        'transformation': 'Weights transformation, default is "R" '
                          '(row-standardized). Other options include "B": '
                          'binary, "D": doubly-standardized, "V": '
                          'variance-stabilizing.',
Esempio n. 21
0
    },
    output_descriptions={
        'correlation_table':
        'The resulting table of pairwise correlations with R and p-value.'
    },
    name='Build pairwise correlations between observations',
    description=(
        'Build pairwise correlations between all observations in feature table'
    ),
)

plugin.methods.register_function(
    function=build_correlation_network_r,
    inputs={'correlation_table': PairwiseFeatureData},
    parameters={
        'min_val': Float % Range(0, 1, inclusive_end=True),
        'cooccur': Bool
    },
    outputs=[('correlation_network', Network)],
    input_descriptions={
        'correlation_table':
        ('Pairwise feature data table of correlations with r value.')
    },
    parameter_descriptions={
        'min_val':
        'The minimum r value to say an edge should exist.',
        'cooccur':
        'Whether or not to constrain the network to only positive edges.'
    },
    output_descriptions={'correlation_network': 'The resulting network.'},
    name='Build a correlation network based on an r value cutoff',
Esempio n. 22
0
 def test_collection_primitive(self):
     self.assert_roundtrip(Set[Str % Choices('A', 'B', 'C')])
     self.assert_roundtrip(List[Int % Range(1, 3, inclusive_end=True)
                                | Str % Choices('A', 'B', 'C')])
Esempio n. 23
0
plugin.methods.register_function(
    function=q2_phylogeny.midpoint_root,
    inputs={'tree': Phylogeny[Unrooted]},
    parameters={},
    outputs=[('rooted_tree', Phylogeny[Rooted])],
    input_descriptions={'tree': 'The phylogenetic tree to be rooted.'},
    parameter_descriptions={},
    output_descriptions={'rooted_tree': 'The rooted phylogenetic tree.'},
    name='Midpoint root an unrooted phylogenetic tree.',
    description=("Midpoint root an unrooted phylogenetic tree."))

plugin.methods.register_function(
    function=q2_phylogeny.fasttree,
    inputs={'alignment': FeatureData[AlignedSequence]},
    parameters={'n_threads': Int % Range(0, None)},
    outputs=[('tree', Phylogeny[Unrooted])],
    input_descriptions={
        'alignment': ('Aligned sequences to be used for phylogenetic '
                      'reconstruction.')
    },
    parameter_descriptions={
        'n_threads':
        'The number of threads. Using more than one thread '
        'runs the non-deterministic variant of `FastTree` '
        '(`FastTreeMP`), and may result in a different tree than '
        'single-threading. See '
        'http://www.microbesonline.org/fasttree/#OpenMP for '
        'details. (Use 0 to automatically use all available '
        'cores)'
    },
Esempio n. 24
0
plugin = Plugin(
    name='ipcress',
    version=q2_ipcress.__version__,
    website='https://github.com/BenKaehler/q2-ipcress',
    package='q2_ipcress',
    description=('This QIIME 2 plugin provides support for generating '
                 'synthetic PCR reads from a set of reference sequences.'),
    short_description='Wrapper for ipcress, an in-silico PCR program.')

plugin.methods.register_function(
    function=q2_ipcress.ipcress,
    inputs={'sequence': FeatureData[Sequence]},
    parameters={
        'primer_a': Str,
        'primer_b': Str,
        'min_product_len': Int % Range(0, None),
        'max_product_len': Int % Range(0, None),
        'mismatch': Int % Range(0, None),
        'memory': Int % Range(0, None),
        'seed': Int % Range(0, None)
    },
    outputs=[('reads', FeatureData[Sequence])],
    name='Run in-silico PCR on references',
    description='Extract sequencing-like reads from a reference database.',
    parameter_descriptions={
        'primer_a':
        'Sequence for the first primer',
        'primer_b':
        'Sequence for the second primer',
        'min_product_len':
        'Minimum product length to report',
Esempio n. 25
0
# plugin.register_semantic_type_to_format(ReferenceSequence, GenBankDirFmt)
plugin.register_semantic_type_to_format(ReferenceSequence,
                                        DNASequencesDirectoryFormat)
plugin.register_semantic_type_to_format(SampleData[PileUp], PileUpFilesDirFmt)
plugin.register_semantic_type_to_format(SampleData[AlignmentMap],
                                        BAMFilesDirFmt)
plugin.register_semantic_type_to_format(SampleData[ConsensusSequences],
                                        FASTAFilesDirFmt)

importlib.import_module('q2_phylogenomics._transformers')

prinseq_input = {'demultiplexed_sequences': 'The sequences to be trimmed.'}
prinseq_output = {'trimmed_sequences': 'The resulting trimmed sequences.'}

prinseq_parameters = {
    'trim_qual_right': Int % Range(1, None),
    'trim_qual_type': Str % Choices(['min', 'mean', 'max', 'sum']),
    'trim_qual_window': Int % Range(1, None),
    'min_qual_mean': Int % Range(1, None),
    'min_len': Int % Range(1, None),
    'lc_method': Str % Choices(['dust', 'entropy']),
    'lc_threshold': Int % Range(0, 100),
    'derep': List[Str % Choices(list('12345'))]
}

prinseq_parameter_descriptions = {
    'trim_qual_right':
    'Trim sequence by quality score from the 3\'-end with '
    'this threshold score.',
    'trim_qual_type':
    'Type of quality score calculation to use. Allowed '
Esempio n. 26
0
        'are present in a fully comprehensive reference database. To simulate '
        'more realistic conditions, see `evaluate_cross_validate`. THE '
        'CLASSIFIER OUTPUT BY THIS PIPELINE IS PRODUCTION-READY and can be '
        're-used for classification of other sequences (provided the '
        'reference data are viable), hence THIS PIPELINE IS USEFUL FOR '
        'TRAINING FEATURE CLASSIFIERS AND THEN EVALUATING THEM ON-THE-FLY.'),
    citations=[citations['bokulich2018optimizing']]
)


plugin.pipelines.register_function(
    function=evaluate_cross_validate,
    inputs={'sequences': FeatureData[Sequence],
            'taxonomy': FeatureData[Taxonomy]},
    parameters={
        'k': Int % Range(2, None),
        'random_state': Int % Range(0, None),
        'reads_per_batch': _classify_parameters['reads_per_batch'],
        'n_jobs': _classify_parameters['n_jobs'],
        'confidence': _classify_parameters['confidence']},
    outputs=[('expected_taxonomy', FeatureData[Taxonomy]),
             ('observed_taxonomy', FeatureData[Taxonomy]),
             ('evaluation', Visualization)],
    input_descriptions={
        'sequences': 'Reference sequences to use for classifier '
                     'training/testing.',
        'taxonomy': 'Reference taxonomy to use for classifier '
                    'training/testing.'},
    parameter_descriptions={
        'k': 'Number of stratified folds.',
        'random_state': 'Seed used by the random number generator.',
Esempio n. 27
0
    function=params_only_method,
    inputs={},
    parameters={
        'name': Str,
        'age': Int
    },
    outputs=[('out', Mapping)],
    name='Parameters only method',
    description='This method only accepts parameters.',
)

dummy_plugin.methods.register_function(
    function=unioned_primitives,
    inputs={},
    parameters={
        'foo': Int % Range(1, None) | Str % Choices(['auto_foo']),
        'bar': Int % Range(1, None) | Str % Choices(['auto_bar']),
    },
    outputs=[('out', Mapping)],
    name='Unioned primitive parameter',
    description='This method has a unioned primitive parameter')

dummy_plugin.methods.register_function(
    function=no_input_method,
    inputs={},
    parameters={},
    outputs=[('out', Mapping)],
    name='No input method',
    description='This method does not accept any type of input.')

dummy_plugin.methods.register_function(
Esempio n. 28
0
    description='This QIIME 2 plugin uses cutadapt to work with '
                'adapters (e.g. barcodes, primers) in sequence data.',
    short_description='Plugin for removing adapter sequences, primers, and '
                      'other unwanted sequence from sequence data.',
    citation_text='Martin, M. (2011). Cutadapt removes adapter sequences from '
                  'high-throughput sequencing reads. EMBnet.Journal, 17(1), '
                  'pp. 10-12.\ndoi:http://dx.doi.org/10.14806/ej.17.1.200',
)

plugin.methods.register_function(
    function=q2_cutadapt._trim.trim_single,
    inputs={
        'demultiplexed_sequences': SampleData[SequencesWithQuality],
    },
    parameters={
        'cores': Int % Range(1, None),
        'adapter': List[Str],
        'front': List[Str],
        'anywhere': List[Str],
        'error_rate': Float % Range(0, 1, inclusive_start=True,
                                    inclusive_end=True),
        'indels': Bool,
        'times': Int % Range(1, None),
        'overlap': Int % Range(1, None),
        'match_read_wildcards': Bool,
        'match_adapter_wildcards': Bool,
    },
    outputs=[
        ('trimmed_sequences', SampleData[SequencesWithQuality]),
    ],
    input_descriptions={
Esempio n. 29
0
    description=('This QIIME 2 plugin supports metrics for calculating '
                 'and exploring community alpha and beta diversity through '
                 'statistics and visualizations in the context of sample '
                 'metadata.'),
    short_description='Plugin for exploring community diversity.',
)

plugin.methods.register_function(
    function=q2_diversity.beta_phylogenetic,
    inputs={
        'table': FeatureTable[Frequency],
        'phylogeny': Phylogeny[Rooted]
    },
    parameters={
        'metric': Str % Choices(beta.phylogenetic_metrics()),
        'n_jobs': Int % Range(1, None)
    },
    outputs=[('distance_matrix', DistanceMatrix % Properties('phylogenetic'))],
    input_descriptions={
        'table': ('The feature table containing the samples over which beta '
                  'diversity should be computed.'),
        'phylogeny': ('Phylogenetic tree containing tip identifiers that '
                      'correspond to the feature identifiers in the table. '
                      'This tree can contain tip ids that are not present in '
                      'the table, but all feature ids in the table must be '
                      'present in this tree.')
    },
    parameter_descriptions={
        'metric': 'The beta diversity metric to be computed.',
        'n_jobs':
        '[Excluding weighted_unifrac] - %s' % sklearn_n_jobs_description
Esempio n. 30
0
        'are present in a fully comprehensive reference database. To simulate '
        'more realistic conditions, see `evaluate_cross_validate`. THE '
        'CLASSIFIER OUTPUT BY THIS PIPELINE IS PRODUCTION-READY and can be '
        're-used for classification of other sequences (provided the '
        'reference data are viable), hence THIS PIPELINE IS USEFUL FOR '
        'TRAINING FEATURE CLASSIFIERS AND THEN EVALUATING THEM ON-THE-FLY.'),
    citations=[citations['bokulich2018optimizing']])

plugin.pipelines.register_function(
    function=evaluate_cross_validate,
    inputs={
        'sequences': FeatureData[Sequence],
        'taxonomy': FeatureData[Taxonomy]
    },
    parameters={
        'k': Int % Range(2, None),
        'random_state': Int % Range(0, None),
        'reads_per_batch': _classify_parameters['reads_per_batch'],
        'n_jobs': _classify_parameters['n_jobs'],
        'confidence': _classify_parameters['confidence']
    },
    outputs=[('expected_taxonomy', FeatureData[Taxonomy]),
             ('observed_taxonomy', FeatureData[Taxonomy]),
             ('evaluation', Visualization)],
    input_descriptions={
        'sequences': 'Reference sequences to use for classifier '
        'training/testing.',
        'taxonomy': 'Reference taxonomy to use for classifier '
        'training/testing.'
    },
    parameter_descriptions={