class EMPSingleEndCasavaDirFmt(model.DirectoryFormat): # TODO: generalize this with a regex when we have validation in place for # model.FileCollections. The file names are currently designed more # specificially for handling MiSeq data. sequences = model.File(r'Undetermined_S0_L001_R1_001.fastq.gz', format=FastqGzFormat) barcodes = model.File(r'Undetermined_S0_L001_I1_001.fastq.gz', format=FastqGzFormat)
class EMPPairedEndCasavaDirFmt(model.DirectoryFormat): forward = model.File(r'Undetermined_S0_L001_R1_001.fastq.gz', format=FastqGzFormat) reverse = model.File(r'Undetermined_S0_L001_R2_001.fastq.gz', format=FastqGzFormat) barcodes = model.File(r'Undetermined_S0_L001_I1_001.fastq.gz', format=FastqGzFormat)
class RedundantSingleIntDirectoryFormat(model.DirectoryFormat): """ Two files of SingleIntFormat which are exactly the same. """ int1 = model.File('file1.txt', format=SingleIntFormat) int2 = model.File('file2.txt', format=SingleIntFormat) def _validate_(self, level): if self.int1.view(int) != self.int2.view(int): raise ValidationError("file1.txt does not match file2.txt")
class WinnowedDirectoryFormat(model.DirectoryFormat): # this is an example of a fixed layout since it will always include Feature ordering w/ Jaccard results # as well as complementary metadata files AUC values and Permanova files featureOrdering = model.File( r"feature_ordered.tsv", format=WinnowedFeatureOrderingFormat) # Feature ordering w/ Jaccard auc = model.File( r"auc_ordered.tsv", format=WinnowedAucOrderingFormat) # AUC values with ordering permanova = model.File(r"permanova_ordered.tsv", format=WinnowedPermanovaOrderingFormat ) # PERMANOVA values with ordering
class SBMLDirectory(model.DirectoryFormat): manifest = model.File("manifest.csv", format=ModelManifest) sbml_files = model.FileCollection(r".+\.xml", format=SBMLFormat) @sbml_files.set_path_maker def sbml_path_maker(self, model_id): return "%s.xml" % model_id
class JSONDirectory(model.DirectoryFormat): manifest = model.File("manifest.csv", format=ModelManifest) json_files = model.FileCollection(r".+\.json", format=JSONFormat) @json_files.set_path_maker def sbml_path_maker(self, model_id): return "%s.json" % model_id
class CommunityModelDirectory(model.DirectoryFormat): manifest = model.File("manifest.csv", format=CommunityModelManifest) model_files = model.FileCollection(r".+\.pickle", format=CommunityModelFormat) @model_files.set_path_maker def model_path_maker(self, model_id): return "%s.pickle" % model_id
class SeppReferenceDirFmt(model.DirectoryFormat): alignment = model.File(r'aligned-dna-sequences.fasta', format=AlignedDNAFASTAFormat) phylogeny = model.File(r'tree.nwk', format=NewickFormat) raxml_info = model.File(r'raxml-info.txt', format=RAxMLinfoFormat) def _validate_(self, level): seqs = self.alignment.view(skbio.TabularMSA) tree = self.phylogeny.view(skbio.TreeNode) seqs.reassign_index(minter='id') alignment_ids = set(seqs.index) phylogeny_ids = {t.name for t in tree.tips()} if alignment_ids != phylogeny_ids: raise ValidationError('IDs found in the alignment file that are ' 'missing in the phylogeny file: %s. IDs ' 'found in the phylogeny file that are ' 'missing in the alignment file: %s.' % (sorted(alignment_ids - phylogeny_ids), sorted(phylogeny_ids - alignment_ids)))
class Bowtie2IndexDirFmt(model.DirectoryFormat): idx1 = model.File(r'.+(?<!\.rev)\.1\.bt2', format=Bowtie2IndexFileFormat) idx2 = model.File(r'.+(?<!\.rev)\.2\.bt2', format=Bowtie2IndexFileFormat) ref3 = model.File(r'.+\.3\.bt2', format=Bowtie2IndexFileFormat) ref4 = model.File(r'.+\.4\.bt2', format=Bowtie2IndexFileFormat) rev1 = model.File(r'.+\.rev\.1\.bt2', format=Bowtie2IndexFileFormat) rev2 = model.File(r'.+\.rev\.2\.bt2', format=Bowtie2IndexFileFormat) def get_name(self): filename = str(self.idx1.path_maker().relative_to(self.path)) return filename.rsplit('.1.bt2')[0]
class Bowtie2IndexDirFmt(model.DirectoryFormat): idx1 = model.File(r'.+(?<!\.rev)\.1\.bt2l?', format=Bowtie2IndexFileFormat) idx2 = model.File(r'.+(?<!\.rev)\.2\.bt2l?', format=Bowtie2IndexFileFormat) ref3 = model.File(r'.+\.3\.bt2l?', format=Bowtie2IndexFileFormat) ref4 = model.File(r'.+\.4\.bt2l?', format=Bowtie2IndexFileFormat) rev1 = model.File(r'.+\.rev\.1\.bt2l?', format=Bowtie2IndexFileFormat) rev2 = model.File(r'.+\.rev\.2\.bt2l?', format=Bowtie2IndexFileFormat) def get_basename(self): paths = [str(x.relative_to(self.path)) for x in self.path.iterdir()] prefix = _get_prefix(paths) return prefix[:-1] # trim trailing '.'
class NinjaOpsDBDirFmt(model.DirectoryFormat): # NOTE: `db` is used as a placeholder prefix -- NINJA-OPS doesn't care # what the prefix is, just that it's constant. The prefix must be used as # the enclosing directory name, as well as the prefix of each filename # within the directory. index1 = model.File('db/db.1.bt2', format=Bowtie2IndexFormat) index2 = model.File('db/db.2.bt2', format=Bowtie2IndexFormat) index3 = model.File('db/db.3.bt2', format=Bowtie2IndexFormat) index4 = model.File('db/db.4.bt2', format=Bowtie2IndexFormat) rev_index1 = model.File('db/db.rev.1.bt2', format=Bowtie2IndexFormat) rev_index2 = model.File('db/db.rev.2.bt2', format=Bowtie2IndexFormat) replicate_map = model.File('db/db.db', format=NinjaReplicateMapFormat) # TODO does the name `sequences` make sense or is there something more # descriptive? sequences = model.File('db/db.tcf', format=TerrificCompressedFormat) # TODO is there any additional validation that needs to happen on the # directory format that isn't taken care of by the individual FileFormat # classes above? def _validate_(self, level): pass
class MultiplexedPairedEndBarcodeInSequenceDirFmt(model.DirectoryFormat): forward_sequences = model.File('forward.fastq.gz', format=FastqGzFormat) reverse_sequences = model.File('reverse.fastq.gz', format=FastqGzFormat)
class MicomResultsDirectory(model.DirectoryFormat): growth_rates = model.File("growth_rates.csv", format=GrowthRates) exchange_fluxes = model.File("exchange_fluxes.parquet", format=Fluxes)
class EMPMultiplexedDirFmt(model.DirectoryFormat): sequences = model.File(r'sequences.fastq.gz', format=FastqGzFormat) barcodes = model.File(r'barcodes.fastq.gz', format=FastqGzFormat)
class EMPPairedEndDirFmt(model.DirectoryFormat): forward = model.File(r'forward.fastq.gz', format=FastqGzFormat) reverse = model.File(r'reverse.fastq.gz', format=FastqGzFormat) barcodes = model.File(r'barcodes.fastq.gz', format=FastqGzFormat)
class TaxonomicClassifierDirFmt(model.DirectoryFormat): preprocess_params = model.File('preprocess_params.json', format=JSONFormat) sklearn_pipeline = model.File('sklearn_pipeline.tar', format=PickleFormat)
class TaxonomicClassiferTemporaryPickleDirFmt(model.DirectoryFormat): version_info = model.File('sklearn_version.json', format=JSONFormat) sklearn_pipeline = model.File('sklearn_pipeline.tar', format=PickleFormat)
class _SingleLanePerSampleFastqDirFmt(CasavaOneEightSingleLanePerSampleDirFmt): manifest = model.File('MANIFEST', format=FastqManifestFormat) metadata = model.File('metadata.yml', format=YamlFormat)
class IDSelectionDirFmt(model.DirectoryFormat): included = model.File('included.txt', format=UNIXListFormat) excluded = model.File('excluded.txt', format=UNIXListFormat) metadata = model.File('metadata.tsv', format=IDMetadataFormat) label = model.File('label.txt', format=UNIXListFormat)
class MicomResultsDirectory(model.DirectoryFormat): growth_rates = model.File("growth_rates.csv", format=GrowthRates) exchange_fluxes = model.File("exchange_fluxes.csv", format=Fluxes) annotations = model.File("annotations.csv", format=Annotations)
class MappingDirectoryFormat(model.DirectoryFormat): mapping = model.File('mapping.tsv', format=MappingFormat)
class BEASTPosteriorDirFmt(model.DirectoryFormat): log = model.File('posterior.log', format=PosteriorLogFormat) trees = model.File('posterior.trees', format=NexusFormat) ops = model.File('posterior.ops', format=BEASTOpsFileFormat) control = model.File('control_file.xml', format=BEASTControlFileFormat)
class PairedDNASequencesDirectoryFormat(model.DirectoryFormat): left_dna_sequences = model.File('left-dna-sequences.fasta', format=DNAFASTAFormat) right_dna_sequences = model.File('right-dna-sequences.fasta', format=DNAFASTAFormat)
class SampleEstimatorDirFmt(model.DirectoryFormat): version_info = model.File('sklearn_version.json', format=JSONFormat) sklearn_pipeline = model.File('sklearn_pipeline.tar', format=PickleFormat)