class StaphAureusN315Recipe(BaseRecipe): def __init__(self): super(StaphAureusN315Recipe, self).__init__() self.source = SourceFile(self.repo, "staph_aureus_n315.fna.gz", "https://www.ncbi.nlm.nih.gov/nuccore/NC_002745.2?report=fasta&log$=seqview&format=text") self.bt2 = ConstructedFile(self.repo, "staph_aureus_n315.bt2") def name(self): return 'staph_aureus_n315' def fileTypes(self): return ['gz_fasta_nucl', 'bt2_index'] def resultSchema(self): return { 'fasta': 'gz_fasta_nucl', 'bt2': ['bt2_index'] * 6 } def makeRecipe(self): self.source.resolve() self.repo.saveFiles(self, 'fasta', self.source.filepath()) self.bt2.resolve() bt2Indices = glob(self.bt2.filepath() + '*') self.repo.saveFiles(self, 'bt2', *bt2Indices)
class CARDRecipe(BaseRecipe): ''' Recipe for the comprehensive antibiotic resistance database ''' def __init__(self): super(CARDRecipe, self).__init__() self.source = SourceFile(self.repo, "card.faa.gz") self.sbred = ConstructedFile(self.repo, "card.shortbred_markers.faa") self.dmnd = ConstructedFile(self.repo, "card.dmnd") def name(self): return 'card' def fileTypes(self): return ['gz_fasta_aa', 'fasta_aa', 'dmnd-db'] def resultSchema(self): return { 'fasta': 'gz_fasta_aa', 'sbred': 'fasta_aa', 'dmnd': 'dmnd-db', } def makeRecipe(self): self.source.resolve() self.repo.saveFiles(self, 'fasta', self.source.filepath()) self.sbred.resolve() self.repo.saveFiles(self, 'sbred', self.sbred.filepath()) self.dmnd.resolve() self.repo.saveFiles(self, 'dmnd', self.dmnd.filepath())
class MiniKrakenRecipe(BaseRecipe): ''' Recipe for minikraken db ''' def __init__(self): super(MiniKrakenRecipe, self).__init__() self.source = SourceFile(self.repo, "minikraken-db") def name(self): return 'minikraken' def fileTypes(self): return ['kraken-db'] def resultSchema(self): return {'kraken-db': ['kraken-db'] * 4} def makeRecipe(self): self.source.resolve() dbIdx = os.path.join(self.source.filepath(), 'database.idx') kDb = os.path.join(self.source.filepath(), 'database.kdb') taxa = os.path.join(self.source.filepath(), 'taxonomy') taxaNames = os.path.join(taxa, 'names.dmp') taxaNodes = os.path.join(taxa, 'nodes.dmp') fs = [dbIdx, kDb, taxaNames, taxaNodes] self.repo.saveFiles(self, 'kraken-db', *fs)
def __init__(self): super(GOTTCHARecipe, self).__init__() self.bact_species = SourceFile( self.repo, "GOTTCHA_BACTERIA_c4937_k24_u30.species") self.bact_strains = SourceFile( self.repo, "GOTTCHA_BACTERIA_c4937_k24_u30.strain") self.virus_species = SourceFile( self.repo, "GOTTCHA_VIRUSES_c5900_k24_u30.species") self.virus_strains = SourceFile( self.repo, "GOTTCHA_VIRUSES_c5900_k24_u30.strain")
class HG38UCSCGenomeRecipe(BaseRecipe): ''' Recipe for the hg38 genome from UCSC. This genome is used in the CAP because: - hg38 is the most recent as of December 2017 - it includes alt contigs. Bad for variant calling, good for filtering - it uses human readable chromosome names - it uses the rCRS mitochondrial sequence ''' def __init__(self): super(HG38UCSCGenomeRecipe, self).__init__() self.source = SourceFile(self.repo, "hg38_ucsc.fna.gz", "http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz") self.bt2 = ConstructedFile(self.repo, "hg38_ucsc.bt2") def name(self): return 'hg38_ucsc' def fileTypes(self): return ['gz_fasta_nucl', 'bt2_index'] def resultSchema(self): return { 'fasta': 'gz_fasta_nucl', 'bt2': ['bt2_index'] * 6 } def makeRecipe(self): self.source.resolve() self.repo.saveFiles(self, 'fasta', self.source.filepath()) self.bt2.resolve() bt2Indices = glob(self.bt2.filepath() + '*') self.repo.saveFiles(self, 'bt2', *bt2Indices)
class NRProteinRecipe(BaseRecipe): ''' Recipe for uniref90 with diamond index ''' def __init__(self): super(NRProteinRecipe, self).__init__() self.fasta = SourceFile(self.repo, "nr.faa.gz") self.dmnd = ConstructedFile(self.repo, "nr.dmnd") def name(self): return 'nr_protein' def fileTypes(self): return ['gz_fasta_aa', 'dmnd-db'] def resultSchema(self): return {'fasta': 'gz_fasta_aa', 'dmnd': 'dmnd-db'} def makeRecipe(self): self.fasta.resolve() self.repo.saveFiles(self, 'fasta', self.fasta.filepath()) self.dmnd.resolve() self.repo.saveFiles(self, 'dmnd', self.dmnd.filepath())
class MeganRecipe(BaseRecipe): ''' Recipe for uniref90 with diamond index ''' def __init__(self): super(MeganRecipe, self).__init__() self.acc2taxa = SourceFile(self.repo, "prot_acc2tax-May2017.abin") self.blast2lca = ConstructedFile(self.repo, "blast2lca") def name(self): return 'megan' def fileTypes(self): return ['megan_table'] def resultSchema(self): return {'acc2taxa': 'megan_table', 'blast2lca': 'megan_table'} def makeRecipe(self): self.acc2taxa.resolve() self.repo.saveFiles(self, 'acc2taxa', self.acc2taxa.filepath()) self.blast2lca.resolve() self.repo.saveFiles(self, 'blast2lca', self.blast2lca.filepath())
class CommonMacrobialRecipe(BaseRecipe): ''' Recipe for database of common macrobial genomes ''' def __init__(self): super(CommonMacrobialRecipe, self).__init__() self.source = SourceFile(self.repo, "macrobes.fna.gz") self.bt2 = ConstructedFile(self.repo, "common_macrobial.bt2") def name(self): return 'common_macrobial' def fileTypes(self): return ['gz_fasta_nucl', 'bt2_index'] def resultSchema(self): return {'fasta': 'gz_fasta_nucl', 'bt2': ['bt2_index'] * 6} def makeRecipe(self): self.source.resolve() self.repo.saveFiles(self, 'fasta', self.source.filepath()) self.bt2.resolve() bt2Indices = glob(self.bt2.filepath() + '*') self.repo.saveFiles(self, 'bt2', *bt2Indices)
class MethyltransferaseRecipe(BaseRecipe): ''' Recipe for the comprehensive antibiotic resistance database ''' def __init__(self): super(MethyltransferaseRecipe, self).__init__() self.fasta = SourceFile(self.repo, "methyls.faa") self.dmnd = ConstructedFile(self.repo, "methyls.dmnd") def name(self): return 'methyl' def fileTypes(self): return ['gz_fasta_aa', 'dmnd-db'] def resultSchema(self): return {'fasta': 'gz_fasta_aa', 'dmnd': 'dmnd-db'} def makeRecipe(self): self.fasta.resolve() self.repo.saveFiles(self, 'fasta', self.fasta.filepath()) self.dmnd.resolve() self.repo.saveFiles(self, 'dmnd', self.dmnd.filepath())
class MegaresRecipe(BaseRecipe): def __init__(self): super(MegaresRecipe, self).__init__() self.fasta = SourceFile(self.repo, "megares.fa") self.bt2 = ConstructedFile(self.repo, "megares.bt2") self.csv = SourceFile(self.repo, "megares.csv") def name(self): return 'megares' def fileTypes(self): return ['fasta_nucl', 'bt2_index', 'csv'] def resultSchema(self): return {'fasta': 'fasta_nucl', 'bt2': ['bt2_index'] * 6, 'csv': 'csv'} def makeRecipe(self): self.fasta.resolve() self.repo.saveFiles(self, 'fasta', self.fasta.filepath()) self.csv.resolve() self.repo.saveFiles(self, 'csv', self.csv.filepath()) self.bt2.resolve() bt2Indices = glob(self.bt2.filepath() + '*') self.repo.saveFiles(self, 'bt2', *bt2Indices)
def __init__(self): super(CentrifugeRecipe, self).__init__() self.nt = SourceFile(self.repo, "nt.1.cf") self.phv = SourceFile(self.repo, "p+h+v.1.cf")
def __init__(self): super(HG38UCSCGenomeRecipe, self).__init__() self.source = SourceFile(self.repo, "hg38_ucsc.fna.gz", "http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz") self.bt2 = ConstructedFile(self.repo, "hg38_ucsc.bt2")
def __init__(self): super(VFDBRecipe, self).__init__() self.fasta = SourceFile(self.repo, "vfdb.faa") self.dmnd = ConstructedFile(self.repo, "vfdb.dmnd")
def __init__(self): super(KrakenHLLRecipe, self).__init__() self.refseq = SourceFile(self.repo, "refseq-db") self.nt = SourceFile(self.repo, "nt")
def __init__(self): super(NRProteinRecipe, self).__init__() self.fasta = SourceFile(self.repo, "nr.faa.gz") self.dmnd = ConstructedFile(self.repo, "nr.dmnd")
def __init__(self): super(StaphAureusN315Recipe, self).__init__() self.source = SourceFile(self.repo, "staph_aureus_n315.fna.gz", "https://www.ncbi.nlm.nih.gov/nuccore/NC_002745.2?report=fasta&log$=seqview&format=text") self.bt2 = ConstructedFile(self.repo, "staph_aureus_n315.bt2")
def __init__(self): super(CommonMacrobialRecipe, self).__init__() self.source = SourceFile(self.repo, "macrobes.fna.gz") self.bt2 = ConstructedFile(self.repo, "common_macrobial.bt2")
def __init__(self): super(MiniKrakenRecipe, self).__init__() self.source = SourceFile(self.repo, "minikraken-db")
def __init__(self): super(MegaresRecipe, self).__init__() self.fasta = SourceFile(self.repo, "megares.fa") self.bt2 = ConstructedFile(self.repo, "megares.bt2") self.csv = SourceFile(self.repo, "megares.csv")
def __init__(self): super(CARDRecipe, self).__init__() self.source = SourceFile(self.repo, "card.faa.gz") self.sbred = ConstructedFile(self.repo, "card.shortbred_markers.faa") self.dmnd = ConstructedFile(self.repo, "card.dmnd")
def __init__(self): super(MeganRecipe, self).__init__() self.acc2taxa = SourceFile(self.repo, "prot_acc2tax-May2017.abin") self.blast2lca = ConstructedFile(self.repo, "blast2lca")
def __init__(self): super(MethyltransferaseRecipe, self).__init__() self.fasta = SourceFile(self.repo, "methyls.faa") self.dmnd = ConstructedFile(self.repo, "methyls.dmnd")