Exemple #1
0
class EMPSingleEndCasavaDirFmt(model.DirectoryFormat):
    # TODO: generalize this with a regex when we have validation in place for
    # model.FileCollections. The file names are currently designed more
    # specificially for handling MiSeq data.
    sequences = model.File(r'Undetermined_S0_L001_R1_001.fastq.gz',
                           format=FastqGzFormat)

    barcodes = model.File(r'Undetermined_S0_L001_I1_001.fastq.gz',
                          format=FastqGzFormat)
Exemple #2
0
class EMPPairedEndCasavaDirFmt(model.DirectoryFormat):
    forward = model.File(r'Undetermined_S0_L001_R1_001.fastq.gz',
                         format=FastqGzFormat)

    reverse = model.File(r'Undetermined_S0_L001_R2_001.fastq.gz',
                         format=FastqGzFormat)

    barcodes = model.File(r'Undetermined_S0_L001_I1_001.fastq.gz',
                          format=FastqGzFormat)
Exemple #3
0
class RedundantSingleIntDirectoryFormat(model.DirectoryFormat):
    """
    Two files of SingleIntFormat which are exactly the same.

    """
    int1 = model.File('file1.txt', format=SingleIntFormat)
    int2 = model.File('file2.txt', format=SingleIntFormat)

    def _validate_(self, level):
        if self.int1.view(int) != self.int2.view(int):
            raise ValidationError("file1.txt does not match file2.txt")
Exemple #4
0
class WinnowedDirectoryFormat(model.DirectoryFormat):
    # this is an example of a fixed layout since it will always include Feature ordering w/ Jaccard results
    # as well as complementary metadata files AUC values and Permanova files
    featureOrdering = model.File(
        r"feature_ordered.tsv",
        format=WinnowedFeatureOrderingFormat)  # Feature ordering w/ Jaccard
    auc = model.File(
        r"auc_ordered.tsv",
        format=WinnowedAucOrderingFormat)  # AUC values with ordering
    permanova = model.File(r"permanova_ordered.tsv",
                           format=WinnowedPermanovaOrderingFormat
                           )  # PERMANOVA values with ordering
class SBMLDirectory(model.DirectoryFormat):
    manifest = model.File("manifest.csv", format=ModelManifest)
    sbml_files = model.FileCollection(r".+\.xml", format=SBMLFormat)

    @sbml_files.set_path_maker
    def sbml_path_maker(self, model_id):
        return "%s.xml" % model_id
class JSONDirectory(model.DirectoryFormat):
    manifest = model.File("manifest.csv", format=ModelManifest)
    json_files = model.FileCollection(r".+\.json", format=JSONFormat)

    @json_files.set_path_maker
    def sbml_path_maker(self, model_id):
        return "%s.json" % model_id
class CommunityModelDirectory(model.DirectoryFormat):
    manifest = model.File("manifest.csv", format=CommunityModelManifest)
    model_files = model.FileCollection(r".+\.pickle",
                                       format=CommunityModelFormat)

    @model_files.set_path_maker
    def model_path_maker(self, model_id):
        return "%s.pickle" % model_id
class SeppReferenceDirFmt(model.DirectoryFormat):
    alignment = model.File(r'aligned-dna-sequences.fasta',
                           format=AlignedDNAFASTAFormat)
    phylogeny = model.File(r'tree.nwk', format=NewickFormat)
    raxml_info = model.File(r'raxml-info.txt', format=RAxMLinfoFormat)

    def _validate_(self, level):
        seqs = self.alignment.view(skbio.TabularMSA)
        tree = self.phylogeny.view(skbio.TreeNode)

        seqs.reassign_index(minter='id')
        alignment_ids = set(seqs.index)
        phylogeny_ids = {t.name for t in tree.tips()}

        if alignment_ids != phylogeny_ids:
            raise ValidationError('IDs found in the alignment file that are '
                                  'missing in the phylogeny file: %s. IDs '
                                  'found in the phylogeny file that are '
                                  'missing in the alignment file: %s.' %
                                  (sorted(alignment_ids - phylogeny_ids),
                                   sorted(phylogeny_ids - alignment_ids)))
Exemple #9
0
class Bowtie2IndexDirFmt(model.DirectoryFormat):
    idx1 = model.File(r'.+(?<!\.rev)\.1\.bt2', format=Bowtie2IndexFileFormat)
    idx2 = model.File(r'.+(?<!\.rev)\.2\.bt2', format=Bowtie2IndexFileFormat)
    ref3 = model.File(r'.+\.3\.bt2', format=Bowtie2IndexFileFormat)
    ref4 = model.File(r'.+\.4\.bt2', format=Bowtie2IndexFileFormat)
    rev1 = model.File(r'.+\.rev\.1\.bt2', format=Bowtie2IndexFileFormat)
    rev2 = model.File(r'.+\.rev\.2\.bt2', format=Bowtie2IndexFileFormat)

    def get_name(self):
        filename = str(self.idx1.path_maker().relative_to(self.path))
        return filename.rsplit('.1.bt2')[0]
Exemple #10
0
class Bowtie2IndexDirFmt(model.DirectoryFormat):
    idx1 = model.File(r'.+(?<!\.rev)\.1\.bt2l?', format=Bowtie2IndexFileFormat)
    idx2 = model.File(r'.+(?<!\.rev)\.2\.bt2l?', format=Bowtie2IndexFileFormat)
    ref3 = model.File(r'.+\.3\.bt2l?', format=Bowtie2IndexFileFormat)
    ref4 = model.File(r'.+\.4\.bt2l?', format=Bowtie2IndexFileFormat)
    rev1 = model.File(r'.+\.rev\.1\.bt2l?', format=Bowtie2IndexFileFormat)
    rev2 = model.File(r'.+\.rev\.2\.bt2l?', format=Bowtie2IndexFileFormat)

    def get_basename(self):
        paths = [str(x.relative_to(self.path)) for x in self.path.iterdir()]
        prefix = _get_prefix(paths)
        return prefix[:-1]  # trim trailing '.'
Exemple #11
0
class NinjaOpsDBDirFmt(model.DirectoryFormat):
    # NOTE: `db` is used as a placeholder prefix -- NINJA-OPS doesn't care
    # what the prefix is, just that it's constant. The prefix must be used as
    # the enclosing directory name, as well as the prefix of each filename
    # within the directory.
    index1 = model.File('db/db.1.bt2', format=Bowtie2IndexFormat)
    index2 = model.File('db/db.2.bt2', format=Bowtie2IndexFormat)
    index3 = model.File('db/db.3.bt2', format=Bowtie2IndexFormat)
    index4 = model.File('db/db.4.bt2', format=Bowtie2IndexFormat)
    rev_index1 = model.File('db/db.rev.1.bt2', format=Bowtie2IndexFormat)
    rev_index2 = model.File('db/db.rev.2.bt2', format=Bowtie2IndexFormat)

    replicate_map = model.File('db/db.db', format=NinjaReplicateMapFormat)

    # TODO does the name `sequences` make sense or is there something more
    # descriptive?
    sequences = model.File('db/db.tcf', format=TerrificCompressedFormat)

    # TODO is there any additional validation that needs to happen on the
    # directory format that isn't taken care of by the individual FileFormat
    # classes above?
    def _validate_(self, level):
        pass
Exemple #12
0
class MultiplexedPairedEndBarcodeInSequenceDirFmt(model.DirectoryFormat):
    forward_sequences = model.File('forward.fastq.gz', format=FastqGzFormat)
    reverse_sequences = model.File('reverse.fastq.gz', format=FastqGzFormat)
class MicomResultsDirectory(model.DirectoryFormat):
    growth_rates = model.File("growth_rates.csv", format=GrowthRates)
    exchange_fluxes = model.File("exchange_fluxes.parquet", format=Fluxes)
Exemple #14
0
class EMPMultiplexedDirFmt(model.DirectoryFormat):
    sequences = model.File(r'sequences.fastq.gz', format=FastqGzFormat)

    barcodes = model.File(r'barcodes.fastq.gz', format=FastqGzFormat)
Exemple #15
0
class EMPPairedEndDirFmt(model.DirectoryFormat):
    forward = model.File(r'forward.fastq.gz', format=FastqGzFormat)

    reverse = model.File(r'reverse.fastq.gz', format=FastqGzFormat)

    barcodes = model.File(r'barcodes.fastq.gz', format=FastqGzFormat)
Exemple #16
0
class TaxonomicClassifierDirFmt(model.DirectoryFormat):
    preprocess_params = model.File('preprocess_params.json', format=JSONFormat)
    sklearn_pipeline = model.File('sklearn_pipeline.tar', format=PickleFormat)
Exemple #17
0
class TaxonomicClassiferTemporaryPickleDirFmt(model.DirectoryFormat):
    version_info = model.File('sklearn_version.json', format=JSONFormat)
    sklearn_pipeline = model.File('sklearn_pipeline.tar', format=PickleFormat)
Exemple #18
0
class _SingleLanePerSampleFastqDirFmt(CasavaOneEightSingleLanePerSampleDirFmt):
    manifest = model.File('MANIFEST', format=FastqManifestFormat)
    metadata = model.File('metadata.yml', format=YamlFormat)
Exemple #19
0
class IDSelectionDirFmt(model.DirectoryFormat):
    included = model.File('included.txt', format=UNIXListFormat)
    excluded = model.File('excluded.txt', format=UNIXListFormat)
    metadata = model.File('metadata.tsv', format=IDMetadataFormat)
    label = model.File('label.txt', format=UNIXListFormat)
class MicomResultsDirectory(model.DirectoryFormat):
    growth_rates = model.File("growth_rates.csv", format=GrowthRates)
    exchange_fluxes = model.File("exchange_fluxes.csv", format=Fluxes)
    annotations = model.File("annotations.csv", format=Annotations)
Exemple #21
0
class MappingDirectoryFormat(model.DirectoryFormat):
    mapping = model.File('mapping.tsv', format=MappingFormat)
Exemple #22
0
class BEASTPosteriorDirFmt(model.DirectoryFormat):
    log = model.File('posterior.log', format=PosteriorLogFormat)
    trees = model.File('posterior.trees', format=NexusFormat)
    ops = model.File('posterior.ops', format=BEASTOpsFileFormat)
    control = model.File('control_file.xml', format=BEASTControlFileFormat)
Exemple #23
0
class PairedDNASequencesDirectoryFormat(model.DirectoryFormat):
    left_dna_sequences = model.File('left-dna-sequences.fasta',
                                    format=DNAFASTAFormat)
    right_dna_sequences = model.File('right-dna-sequences.fasta',
                                     format=DNAFASTAFormat)
class SampleEstimatorDirFmt(model.DirectoryFormat):
    version_info = model.File('sklearn_version.json', format=JSONFormat)
    sklearn_pipeline = model.File('sklearn_pipeline.tar', format=PickleFormat)