def sample1_se_tiny_fq(tmpdir_factory): """ Single-end FASTQ file with 1010 reads """ d = tmpdir_for_func(tmpdir_factory) fn = 'rnaseq_samples/sample1/sample1.tiny_R1.fastq.gz' return _download_file(fn, d)
def generic_fixture(key, mapping, factory): """ Tries to handle as much of the magic as possible. Parameters ---------- key : str Key into the module-level config dict mapping : dict Maps paths from fixtures to input files expected by the snakefile tmpdir : str Path to temporary dir, usually created by utils.tmpdir_for_func Returns ------- After a successful Snakemake run, returns the dictionary of the config's `output` key but with paths fixed to be relative to tmpdir. This returned dict is ready to be used as a fixture by test functions. """ conf = config[key] tmpdir = utils.tmpdir_for_func(factory) input_data_func = utils.symlink_in_tempdir(mapping) utils.run(utils.dpath(conf['wrapper']), conf['snakefile'], None, input_data_func, tmpdir) output = conf['output'].copy() for k, v in output.items(): output[k] = os.path.join(tmpdir, v) return output
def sample1_pe_tiny_fq(tmpdir_factory): pair = [] d = tmpdir_for_func(tmpdir_factory) for fn in [ 'rnaseq_samples/sample1/sample1.tiny_R1.fastq.gz', 'rnaseq_samples/sample1/sample1.tiny_R2.fastq.gz' ]: pair.append(_download_file(fn, d)) return pair
def bowtie2_indexes(dm6_fa, tmpdir_factory): d = tmpdir_for_func(tmpdir_factory) snakefile = ''' rule bowtie2: input: fasta='dm6.fa' output: index=['dm6.1.bt2', 'dm6.2.bt2'] log: 'bowtie2.log' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir({dm6_fa: 'dm6.fa'}) def check(): assert 'Total time for backward call to driver' in open( 'bowtie2.log').readlines()[-1] assert list(shell('bowtie2-inspect dm6 -n', iterable=True)) == ['2L', '2R'] run(dpath('../wrappers/bowtie2/build'), snakefile, check, input_data_func, d) return aligners.bowtie2_index_from_prefix(os.path.join(d, 'dm6'))
def kallisto_index(tmpdir_factory, transcriptome): d = tmpdir_for_func(tmpdir_factory) snakefile = ''' rule kallisto: input: fasta='transcriptome.fa' output: index='transcriptome.idx' log: 'log' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir({ transcriptome: 'transcriptome.fa', }) def check(): log = open('log').read() assert '[build] target deBruijn graph' run(dpath('../wrappers/kallisto/index'), snakefile, check, input_data_func, d) return os.path.join(d, 'transcriptome.idx')
def hisat2_indexes(dm6_fa, tmpdir_factory): d = tmpdir_for_func(tmpdir_factory) snakefile = ''' rule hisat2: input: fasta='2L.fa' output: index=['2L.1.ht2', '2L.2.ht2'] log: 'hisat.log' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir( { dm6_fa: '2L.fa' } ) def check(): assert 'Total time for call to driver' in open('hisat.log').readlines()[-1] assert list(shell('hisat2-inspect 2L -n', iterable=True)) == ['2L', '2R'] run( dpath('../wrappers/hisat2/build'), snakefile, check, input_data_func, d) return aligners.hisat2_index_from_prefix(os.path.join(d, '2L'))
def salmon_index(tmpdir_factory, transcriptome): d = tmpdir_for_func(tmpdir_factory) snakefile = ''' rule salmon: input: fasta='transcriptome.fa' output: hash='salmon_index/hash.bin' log: 'log' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir( { transcriptome: 'transcriptome.fa', } ) def check(): log = open('log').read() assert '[info] done building index' in log run( dpath('../wrappers/salmon/index'), snakefile, check, input_data_func, d) return os.path.join(d, 'salmon_index')
def sample1_se_tiny_bam(tmpdir_factory): d = tmpdir_for_func(tmpdir_factory) fn = 'rnaseq_samples/sample1/sample1.tiny.single.sorted.bam' return _download_file(fn, d)
def sample1_pe_bam(tmpdir_factory): d = tmpdir_for_func(tmpdir_factory) fn = 'rnaseq_samples/sample1/sample1.small.paired.sorted.bam' return _download_file(fn, d)
def annotation_refflat(tmpdir_factory): fn = 'annotation/dm6.small.refflat' d = tmpdir_for_func(tmpdir_factory) return _download_file(fn, d)
def annotation(tmpdir_factory): fn = 'annotation/dm6.small.gtf' d = tmpdir_for_func(tmpdir_factory) return _download_file(fn, d)
def dm6_fa(tmpdir_factory): fn = 'seq/dm6.small.fa' d = tmpdir_for_func(tmpdir_factory) return _download_file(fn, d)
def transcriptome(tmpdir_factory): d = tmpdir_for_func(tmpdir_factory) fn = 'seq/dm6.small.transcriptome.fa' return _download_file(fn, d)
def sample1_se_fq(tmpdir_factory): d = tmpdir_for_func(tmpdir_factory) fn = 'rnaseq_samples/sample1/sample1.small_R1.fastq.gz' return _download_file(fn, d)