# ---------------------------------------------------------------------------- # Copyright (c) 2016-2017, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import skbio.io import qiime2.plugin.model as model from ..plugin_setup import plugin class NewickFormat(model.TextFileFormat): def sniff(self): sniffer = skbio.io.io_registry.get_sniffer('newick') return sniffer(str(self))[0] NewickDirectoryFormat = model.SingleFileDirectoryFormat( 'NewickDirectoryFormat', 'tree.nwk', NewickFormat) plugin.register_formats(NewickFormat, NewickDirectoryFormat)
from qiime2.plugin import SemanticType, model from q2_types.feature_data import FeatureData Factors = SemanticType('Factors', variant_of=FeatureData.field['type']) class FactorsFormat(model.TextFileFormat): # Update with required formatting def validate(*args): pass FactorsDirFmt = model.SingleFileDirectoryFormat('FactorsDirFmt', 'factors.tsv', FactorsFormat)
class SingleIntFormat(TextFileFormat): """ Exactly one int on a single line in the file. """ def _validate_(self, level): with self.open() as fh: try: int(fh.readline().rstrip('\n')) except (TypeError, ValueError): raise ValidationError("File does not contain an integer") if fh.readline(): raise ValidationError("Too many lines in file.") IntSequenceDirectoryFormat = model.SingleFileDirectoryFormat( 'IntSequenceDirectoryFormat', 'ints.txt', IntSequenceFormat) IntSequenceV2DirectoryFormat = model.SingleFileDirectoryFormat( 'IntSequenceV2DirectoryFormat', 'integers.txt', IntSequenceFormatV2) class IntSequenceMultiFileDirectoryFormat(model.DirectoryFormat): pass # This could have been a `SingleFileDirectoryFormat`, but isn't for testing # purposes class MappingDirectoryFormat(model.DirectoryFormat): mapping = model.File('mapping.tsv', format=MappingFormat)
def sniff(self): try: with self.open() as fh: for grp in self.groups: if grp not in fh: return False for ds in self.datasets: if ds not in fh: return False for attr in self.attrs: if attr not in fh.attrs: return False return True except Exception: return False BIOMV100DirFmt = model.SingleFileDirectoryFormat('BIOMV100DirFmt', 'feature-table.biom', BIOMV100Format) BIOMV210DirFmt = model.SingleFileDirectoryFormat('BIOMV210DirFmt', 'feature-table.biom', BIOMV210Format) plugin.register_views(BIOMV100Format, BIOMV210Format, BIOMV100DirFmt, BIOMV210DirFmt, biom.Table, citations=[citations['mcdonald2012biological']])
# # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from qiime2.plugin import model class SeqAlnMapFormat(model.TextFileFormat): def _validate_(self, level): pass SeqAlnMapDirFmt = model.SingleFileDirectoryFormat('SeqAlnMapDirFmt', 'alignment.sam', SeqAlnMapFormat) class BLAST6OutFormat(model.TextFileFormat): def _validate_(self, level): pass BLAST6OutDirFmt = model.SingleFileDirectoryFormat('BLAST6OutDirFmt', 'alignment.b6o', BLAST6OutFormat) class SimpleMapFormat(model.TextFileFormat): def _validate_(self, level):
from qiime2.plugin import SemanticType, model from q2_types.feature_data import FeatureData Conditional = SemanticType('Conditional', variant_of=FeatureData.field['type']) class ConditionalFormat(model.TextFileFormat): def validate(*args): pass ConditionalDirFmt = model.SingleFileDirectoryFormat('ConditionalDirFmt', 'conditionals.tsv', ConditionalFormat)
# ---------------------------------------------------------------------------- # Copyright (c) 2016-2018, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import qiime2.plugin.model as model from ..plugin_setup import plugin class AlphaDiversityFormat(model.TextFileFormat): def sniff(self): with self.open() as fh: i = 0 for line, i in zip(fh, range(10)): cells = line.split('\t') if len(cells) < 2: return False return i > 1 AlphaDiversityDirectoryFormat = model.SingleFileDirectoryFormat( 'AlphaDiversityDirectoryFormat', 'alpha-diversity.tsv', AlphaDiversityFormat) plugin.register_formats(AlphaDiversityFormat, AlphaDiversityDirectoryFormat)
for cell in cells[1:]: try: float(cell) except ValueError: raise ValidationError( "Expected data to be comprised of float values. " "Found non-float value {0} at line {1}".format( cell, line_number)) has_data = True if n_records is not None and (line_number - 1) >= n_records: break _validate_file_not_empty(has_data) CoordinatesDirectoryFormat = model.SingleFileDirectoryFormat( 'CoordinatesDirectoryFormat', 'coordinates.tsv', CoordinatesFormat) class QuadTreeFormat(model.TextFileFormat): def _validate_(self, level): with self.open() as fh: header, records_seen, is_min = None, 0, level == 'min' fh_ = csv.reader(fh, delimiter='\t') file_ = enumerate(fh_, 1) if is_min else zip(range(1, 11), fh_) for i, cells in file_: if header is None: if len(cells) < 2: raise ValidationError( 'Found header on line %d with the following ' 'columns: %s (length: %d), expected at least 2 ' 'columns.' % (i, cells, len(cells)))
class SingleIntFormat(TextFileFormat): """ Exactly one int on a single line in the file. """ def _validate_(self, level): with self.open() as fh: try: int(fh.readline().rstrip('\n')) except (TypeError, ValueError): raise ValidationError("File does not contain an integer") if fh.readline(): raise ValidationError("Too many lines in file.") IntSequenceDirectoryFormat = model.SingleFileDirectoryFormat( 'IntSequenceDirectoryFormat', 'ints.txt', IntSequenceFormat) IntSequenceV2DirectoryFormat = model.SingleFileDirectoryFormat( 'IntSequenceV2DirectoryFormat', 'integers.txt', IntSequenceFormatV2) # This could have been a `SingleFileDirectoryFormat`, but isn't for testing # purposes class MappingDirectoryFormat(model.DirectoryFormat): mapping = model.File('mapping.tsv', format=MappingFormat) class FourIntsDirectoryFormat(model.DirectoryFormat): """ A sequence of exactly four integers stored across multiple files, some of which are in a nested directory. Each file contains a single integer.
# ---------------------------------------------------------------------------- # Copyright (c) 2016-2020, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import skbio.io import qiime2.plugin.model as model from ..plugin_setup import plugin class LSMatFormat(model.TextFileFormat): def sniff(self): sniffer = skbio.io.io_registry.get_sniffer('lsmat') return sniffer(str(self))[0] DistanceMatrixDirectoryFormat = model.SingleFileDirectoryFormat( 'DistanceMatrixDirectoryFormat', 'distance-matrix.tsv', LSMatFormat) plugin.register_formats(LSMatFormat, DistanceMatrixDirectoryFormat)
for line_number, line in enumerate(fh, start=2): cells = line.strip().split('\t') _validate_record_len(cells, line_number, 2) if str(cells[1]) not in ('True', 'False'): raise ValidationError( "Expected data to be comprised of values `True` and " "`False`, found {0} at line {1}.".format( str(cells[1]), line_number)) has_data = True if n_records is not None and (line_number - 1) >= n_records: break _validate_file_not_empty(has_data) BooleanSeriesDirectoryFormat = model.SingleFileDirectoryFormat( 'BooleanSeriesDirectoryFormat', 'outliers.tsv', BooleanSeriesFormat) # This is effectively an internal format - it isn't registered with the # plugin, but rather used as part of a dir fmt. This format also exists # in q2-feature-classifier. class PickleFormat(model.BinaryFileFormat): def _validate_(self, level): if not tarfile.is_tarfile(str(self)): raise ValidationError( "Unable to load pickled file (not a tar file).") # https://github.com/qiime2/q2-types/issues/49 # This is effectively an internal format - it isn't registered with the # plugin, but rather used as part of a dir fmt. This format also exists
BooleanSeries = SemanticType('BooleanSeries', variant_of=SampleData.field['type']) class BooleanSeriesFormat(model.TextFileFormat): def sniff(self): with self.open() as fh: line = fh.readline() for line, _ in zip(fh, range(5)): cells = line.strip().split('\t') if len(cells) != 2 or str(cells[1]) not in ('True', 'False'): return False return True BooleanSeriesDirectoryFormat = model.SingleFileDirectoryFormat( 'BooleanSeriesDirectoryFormat', 'outliers.tsv', BooleanSeriesFormat) def _read_dataframe(fh): # Using `dtype=object` and `set_index` to avoid type casting/inference # of any columns or the index. df = pd.read_csv(fh, sep='\t', header=0, dtype='str') df.set_index(df.columns[0], drop=True, append=False, inplace=True) df.index.name = 'id' return df @plugin.register_transformer def _4(data: pd.Series) -> (BooleanSeriesFormat): ff = BooleanSeriesFormat() with ff.open() as fh:
"removal of features below the min-reads " "threshold."), 'reads-hit-reference': ("The number of Deblur reads which recruited to the" " positive reference. IMPORTANT: this is " "assessed after the removal of features below the " "min-reads threshold."), 'unique-reads-missed-reference': ("The number of unique Deblur reads " "which failed to recruit to the positive" " reference. IMPORTANT: this is " "assessed after the removal of features " "below the min-reads threshold."), 'reads-missed-reference': ("The number of Deblur reads which failed to " "recruit to the positive reference. " "IMPORTANT: this is assessed after the " "removal of features below the min-reads " "threshold.") } class DeblurStatsFmt(model.TextFileFormat): def sniff(self): line = open(str(self)).readline() hdr = line.strip().split(',') return hdr == STATS_HEADER DeblurStatsDirFmt = model.SingleFileDirectoryFormat('DeblurStatsDirFmt', 'stats.csv', DeblurStatsFmt)
# ---------------------------------------------------------------------------- # Copyright (c) 2017-2020, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import qiime2.plugin.model as model class QualityFilterStatsFmt(model.TextFileFormat): def sniff(self): line = open(str(self)).readline() hdr = line.strip().split(',') expected = [ 'sample-id', 'total-input-reads', 'total-retained-reads', 'reads-truncated', 'reads-too-short-after-truncation', 'reads-exceeding-maximum-ambiguous-bases' ] return hdr == expected QualityFilterStatsDirFmt = model.SingleFileDirectoryFormat( 'QualityFilterStatsDirFmt', 'stats.csv', QualityFilterStatsFmt)
# ---------------------------------------------------------------------------- # Copyright (c) 2016-2019, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import qiime2.plugin.model as model class MetadataFormat(model.TextFileFormat): def sniff(self): return True MetadataDirectoryFormat = model.SingleFileDirectoryFormat( 'MetadataDirectoryFormat', 'metadata.tsv', MetadataFormat)
class SingleIntFormat(TextFileFormat): """ Exactly one int on a single line in the file. """ def sniff(self): with self.open() as fh: try: int(fh.readline().rstrip('\n')) except (TypeError, ValueError): return False return True IntSequenceDirectoryFormat = model.SingleFileDirectoryFormat( 'IntSequenceDirectoryFormat', 'ints.txt', IntSequenceFormat) # This could have been a `SingleFileDirectoryFormat`, but isn't for testing # purposes class MappingDirectoryFormat(model.DirectoryFormat): mapping = model.File('mapping.tsv', format=MappingFormat) class FourIntsDirectoryFormat(model.DirectoryFormat): """ A sequence of exactly four integers stored across multiple files, some of which are in a nested directory. Each file contains a single integer. Since this is a sequence, the integers have an order (corresponding to filename) and repetition of elements is allowed.
# TODO indicate tab separated raise ValidationError( "Expected data record to be TSV with two fields, " "detected {0} fields at line {1}:\n\n{2!r}".format( len(cells), line_number, cells)) try: float(cells[1]) except ValueError: raise ValidationError( "Second column must contain only numeric values. " "A non-numeric value ({0!r}) was detected at line " "{1}.".format(cells[1], line_number)) has_data = True if n_records is not None and (line_number - 1) >= n_records: break if not has_data: raise ValidationError( "There must be at least one data record present in the " "file in addition to the header line.") def _validate_(self, level): record_count_map = {'min': 5, 'max': None} self._validate(record_count_map[level]) FirstDifferencesDirectoryFormat = model.SingleFileDirectoryFormat( 'FirstDifferencesDirectoryFormat', 'FirstDifferences.tsv', FirstDifferencesFormat)
reverse = model.File(r'Undetermined_S0_L001_R2_001.fastq.gz', format=FastqGzFormat) barcodes = model.File(r'Undetermined_S0_L001_I1_001.fastq.gz', format=FastqGzFormat) class ErrorCorrectionDetailsFmt(model.TextFileFormat): METADATA_COLUMNS = { 'sample', 'barcode-sequence-id', 'barcode-uncorrected', 'barcode-corrected', 'barcode-errors', } def _validate_(self, level): line = open(str(self)).readline() if len(line.strip()) == 0: raise ValidationError("Failed to locate header.") header = set(line.strip().split('\t')) for column in sorted(self.METADATA_COLUMNS): if column not in header: raise ValidationError(f"{column} is not a column") ErrorCorrectionDetailsDirFmt = model.SingleFileDirectoryFormat( 'ErrorCorrectionDetailsDirFmt', 'details.tsv', ErrorCorrectionDetailsFmt)
from qiime2.plugin import SemanticType, model from q2_types.sample_data import SampleData StrayPosterior = SemanticType('StrayPosterior', variant_of=SampleData.field['type']) class StrayPosteriorFormat(model.TextFileFormat): def validate(*args): pass StrayPosteriorDirFmt = model.SingleFileDirectoryFormat('StrayPosteriorDirFmt', 'posterior.tsv', StrayPosteriorFormat)
# # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import qiime2.plugin.model as model from ..plugin_setup import plugin from q2_types.per_sample_sequences import FastqGzFormat # MultiplexedSingleEndBarcodeInSequenceDirFmt & # MultiplexedPairedEndBarcodeInSequenceDirFmt represent multiplexed sequences # that contain inline barcode information: # AGGACTAGGTAGATC => barcode: AGGA ; biological sequence: CTAGGTAGATC MultiplexedSingleEndBarcodeInSequenceDirFmt = model.SingleFileDirectoryFormat( 'MultiplexedSingleEndBarcodeInSequenceDirFmt', 'forward.fastq.gz', FastqGzFormat) class MultiplexedPairedEndBarcodeInSequenceDirFmt(model.DirectoryFormat): forward_sequences = model.File('forward.fastq.gz', format=FastqGzFormat) reverse_sequences = model.File('reverse.fastq.gz', format=FastqGzFormat) plugin.register_formats( MultiplexedSingleEndBarcodeInSequenceDirFmt, MultiplexedPairedEndBarcodeInSequenceDirFmt, )
# ---------------------------------------------------------------------------- # Copyright (c) 2020, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from qiime2.plugin import SemanticType, model EchoOutput = SemanticType('EchoOutput') class EchoOutputFmt(model.TextFileFormat): def validate(self, *args): pass EchoOutputDirFmt = model.SingleFileDirectoryFormat('EchoOutputDirFmt', 'echo.txt', EchoOutputFmt)
return id def _validate_id(self, id): pieces = id.rsplit('_', maxsplit=1) if len(pieces) != 2 or not all(pieces): raise Exception() def _validate_seq(self, seq): if seq: # Will raise a `ValueError` on invalid DNA characters. skbio.DNA(seq, validate=True) else: # Empty sequence. raise Exception() QIIME1DemuxDirFmt = model.SingleFileDirectoryFormat('QIIME1DemuxDirFmt', 'seqs.fna', QIIME1DemuxFormat) plugin.register_formats( FastqManifestFormat, YamlFormat, FastqGzFormat, CasavaOneEightSingleLanePerSampleDirFmt, CasavaOneEightLanelessPerSampleDirFmt, _SingleLanePerSampleFastqDirFmt, SingleLanePerSampleSingleEndFastqDirFmt, SingleLanePerSamplePairedEndFastqDirFmt, SingleEndFastqManifestPhred33, SingleEndFastqManifestPhred64, PairedEndFastqManifestPhred33, PairedEndFastqManifestPhred64, SingleEndFastqManifestPhred33V2, SingleEndFastqManifestPhred64V2, PairedEndFastqManifestPhred33V2, PairedEndFastqManifestPhred64V2, QIIME1DemuxFormat, QIIME1DemuxDirFmt)
from qiime2.plugin import SemanticType, model from q2_types.sample_data import SampleData SongbirdStats = SemanticType('SongbirdStats', variant_of=SampleData.field['type']) class SongbirdStatsFormat(model.TextFileFormat): def validate(*args): pass SongbirdStatsDirFmt = model.SingleFileDirectoryFormat('SongbirdStatsDirFmt', 'stats.tsv', SongbirdStatsFormat)
if line == '': # EOF break elif line.lstrip(' ') == '\n': # Blank line continue else: cells = line.split('\t') if len(cells) < 2: return False count += 1 return False if count == 0 else True TaxonomyDirectoryFormat = model.SingleFileDirectoryFormat( 'TaxonomyDirectoryFormat', 'taxonomy.tsv', TaxonomyFormat) class HeaderlessTSVTaxonomyFormat(TaxonomyFormat): """Format for a 2+ column TSV file without a header. This format supports comment lines starting with #, and blank lines. """ pass HeaderlessTSVTaxonomyDirectoryFormat = model.SingleFileDirectoryFormat( 'HeaderlessTSVTaxonomyDirectoryFormat', 'taxonomy.tsv', HeaderlessTSVTaxonomyFormat)
# ---------------------------------------------------------------------------- # Copyright (c) 2016-2018, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from qiime2.plugin import SemanticType, model from q2_types.sample_data import SampleData DADA2Stats = SemanticType('DADA2Stats', variant_of=SampleData.field['type']) class DADA2StatsFormat(model.TextFileFormat): def validate(*args): pass DADA2StatsDirFmt = model.SingleFileDirectoryFormat('DADA2StatsDirFmt', 'stats.tsv', DADA2StatsFormat)
# ---------------------------------------------------------------------------- # Copyright (c) 2016-2019, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import skbio.io import qiime2.plugin.model as model from ..plugin_setup import plugin class OrdinationFormat(model.TextFileFormat): def sniff(self): sniffer = skbio.io.io_registry.get_sniffer('ordination') return sniffer(str(self))[0] OrdinationDirectoryFormat = model.SingleFileDirectoryFormat( 'OrdinationDirectoryFormat', 'ordination.txt', OrdinationFormat) plugin.register_formats(OrdinationFormat, OrdinationDirectoryFormat)
# ---------------------------------------------------------------------------- import csv import numpy as np import qiime2.plugin.model as model from qiime2.plugin import ValidationError class UchimeStatsFmt(model.TextFileFormat): def _check_n_records(self, n): with open(str(self)) as fh: csv_reader = csv.reader(fh, delimiter='\t') for i, row in enumerate(csv_reader): if i == n: break else: if len(row) != 18: raise ValidationError( 'Incorrect number of fields detected on line %d.' ' Should be exactly 18.' % (i + 1)) def _validate_(self, level): record_count_map = {'min': 5, 'max': np.inf} self._check_n_records(record_count_map[level]) UchimeStatsDirFmt = model.SingleFileDirectoryFormat( 'UchimeStatsDirFmt', 'stats.tsv', UchimeStatsFmt)
import qiime2 import skbio.io import qiime2.plugin.model as model from qiime2.plugin import ValidationError from ..plugin_setup import plugin class OrdinationFormat(model.TextFileFormat): def sniff(self): sniffer = skbio.io.io_registry.get_sniffer('ordination') return sniffer(str(self))[0] OrdinationDirectoryFormat = model.SingleFileDirectoryFormat( 'OrdinationDirectoryFormat', 'ordination.txt', OrdinationFormat) class ProcrustesStatisticsFmt(model.TextFileFormat): METADATA_COLUMNS = { 'true M^2 value', 'p-value for true M^2 value', 'number of Monte Carlo permutations', } def validate(self, level): try: md = qiime2.Metadata.load(str(self)) except qiime2.metadata.MetadataFileError as md_exc: raise ValidationError(md_exc) from md_exc
elif line.lstrip(' ') == '\n': # Blank line continue elif line.startswith('#'): # Comment line continue else: cells = line.split('\t') if len(cells) < 2: return False count += 1 return False if count == 0 else True TaxonomyDirectoryFormat = model.SingleFileDirectoryFormat( 'TaxonomyDirectoryFormat', 'taxonomy.tsv', TaxonomyFormat) class HeaderlessTSVTaxonomyFormat(TaxonomyFormat): """Format for a 2+ column TSV file without a header. This format supports comment lines starting with #, and blank lines. """ pass HeaderlessTSVTaxonomyDirectoryFormat = model.SingleFileDirectoryFormat( 'HeaderlessTSVTaxonomyDirectoryFormat', 'taxonomy.tsv', HeaderlessTSVTaxonomyFormat)
class NexusFormat(model.TextFileFormat): def _validate_(self, level): pass class BEASTControlFileFormat(model.TextFileFormat): def _validate_(self, level): pass def md5sum(self): import qiime2.core.util # TODO: don't import from here return qiime2.core.util.md5sum(self) class BEASTOpsFileFormat(model.TextFileFormat): def _validate_(self, level): pass class BEASTPosteriorDirFmt(model.DirectoryFormat): log = model.File('posterior.log', format=PosteriorLogFormat) trees = model.File('posterior.trees', format=NexusFormat) ops = model.File('posterior.ops', format=BEASTOpsFileFormat) control = model.File('control_file.xml', format=BEASTControlFileFormat) NexusDirFmt = model.SingleFileDirectoryFormat('NexusDirFmt', 'data.nex', format=NexusFormat)