def test_salmon_quant_single_index(tmpdir, sample1_se_tiny_fq, salmon_index): snakefile = ''' rule salmon_quant: input: unmatedReads='sample1.fq.gz', index='idx/hash.bin' output: 'sample1/salmon/quant.sf' params: extra='--libType A' log: 'salmon.quant.log' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir( { sample1_se_tiny_fq: 'sample1.fq.gz', salmon_index: 'idx', } ) def check(): assert open('sample1/salmon/quant.sf').readline() == ( 'Name\tLength\tEffectiveLength\tTPM\tNumReads\n') run( dpath('../wrappers/salmon/quant'), snakefile, check, input_data_func, tmpdir)
def test_demo_pe(sample1_pe_fq, tmpdir): # In contrast to the sample1_se_tiny_fq fixture used in the previous function, # here the paired-end fixture `sample1_pe_fq` is a tuple of path names (see # conftest.sample1_pe_fq()) # The snakefile reflects what the wrapper expects for PE (see # wrappers/demo/README.md). snakefile = ''' rule demo: input: R1='a1.fastq.gz', R2='a2.fastq.gz' output: R1='b1.fastq.gz', R2='b2.fastq.gz' wrapper: "file:wrapper" ''' # Map fixture to input files. Again, since this is paired-end we need to # make sure both files are provided the right filename for testing. input_data_func = symlink_in_tempdir({ sample1_pe_fq[0]: 'a1.fastq.gz', sample1_pe_fq[1]: 'a2.fastq.gz', }) def check(): assert open('a1.fastq.gz', 'rb').read() == open('b1.fastq.gz', 'rb').read() assert open('a2.fastq.gz', 'rb').read() == open('b2.fastq.gz', 'rb').read() run(dpath('../wrappers/demo'), snakefile, check, input_data_func, tmpdir)
def test_cutadapt_se_with_list(sample1_se_tiny_fq, tmpdir): snakefile = ''' rule cutadapt: input: 'sample1_R1.fastq.gz' output: 'sample1_R1.trim.fastq.gz' params: extra='-a AAA' wrapper: "file:wrapper" ''' input_data_func=symlink_in_tempdir( { sample1_se_tiny_fq: 'sample1_R1.fastq.gz' } ) def check(): """ check for line lengths and that they are at least different sized """ a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz')) b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz')) assert a == b == 4040 assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz') run(dpath('../wrappers/cutadapt'), snakefile, check, input_data_func, tmpdir)
def test_featurecounts_pe(sample1_pe_tiny_bam, annotation, tmpdir): snakefile = ''' rule featurecounts: input: annotation='dm6.gtf', bam='sample1.bam' output: counts='sample1.counts', log: 'featurecounts.log' params: extra='-p -P -s 1 -B --splitOnly' wrapper: "file:wrapper" ''' input_data_func = symlink_in_tempdir({ sample1_pe_tiny_bam: 'sample1.bam', annotation: 'dm6.gtf', }) def check(): assert '//===================' in open('featurecounts.log').read() assert '# Program:featureCounts' in open('sample1.counts').readline() assert open('sample1.counts.summary').readline().startswith('Status') assert sum(1 for _ in open('sample1.counts')) == 169 # TODO: maybe assert that below a certain level are counted when all # those extra arguments are used? run(dpath('../wrappers/featurecounts'), snakefile, check, input_data_func, tmpdir)
def test_bowtie2_align_se_rm_unmapped(bowtie2_indexes, sample1_se_tiny_fq, tmpdir): d = _dict_of_bowtie2_indexes(bowtie2_indexes, 'dm6') indexes = list(d.values()) snakefile = ''' rule bowtie2_align: input: fastq='sample1_R1.fastq.gz', index={indexes} output: bam='sample1.bam' params: samtools_view_extra='-F 0x04' log: "bowtie2.log" wrapper: "file:wrapper" '''.format(indexes=indexes) d[sample1_se_tiny_fq] = 'sample1_R1.fastq.gz' input_data_func = symlink_in_tempdir(d) def check(): assert "overall alignment rate" in open('bowtie2.log').read() # should have at least some mapped and unmapped assert int( list(shell('samtools view -c -f 0x04 sample1.bam', iterable=True))[0]) == 0 assert int( list(shell('samtools view -c -F 0x04 sample1.bam', iterable=True))[0]) > 0 run(dpath('../wrappers/bowtie2/align'), snakefile, check, input_data_func, tmpdir)
def test_gB_cov_png(sample1_se_tiny_bam, sample1_se_tiny_bam_bai, annotation_bed12, tmpdir): snakefile = ''' rule geneBody_coverage: input: bam='sample1_R1.sort.bam', bai='sample1_R1.sort.bam.bai', bed='dm6.bed12' output: txt='sample1_R1.geneBodyCoverage.txt', r='sample1_R1.geneBodyCoverage.r', img='sample1_R1.geneBodyCoverage.png', params: extra: = '-f png' wrapper: "file:wrapper" ''' input_data_func = symlink_in_tempdir({ sample1_se_tiny_bam: 'sample1_R1.sort.bam', sample1_se_tiny_bam_bai['bai']: 'sample1_R1.sort.bam.bai', annotation_bed12: 'dm6.bed12' }) def check(): """ Check that the PNG is created """ assert os.path.exists('sample1_R1.geneBodyCoverage.png')
def test_picard_collectrnaseqmetrics_se_plot(sample1_se_tiny_bam, annotation_refflat, tmpdir): snakefile = ''' rule collectrnaseqmetrics: input: bam='sample1.bam', refflat='dm6.refflat', output: metrics='sample1.metrics', plot='sample1.pdf' log: 'log' params: extra="STRAND=NONE CHART=sample1.pdf" wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir({ sample1_se_tiny_bam: 'sample1.bam', annotation_refflat: 'dm6.refflat', }) def check(): assert '## METRICS CLASS' in open('sample1.metrics').read() run(dpath('../wrappers/picard/collectrnaseqmetrics'), snakefile, check, input_data_func, tmpdir, use_conda=True)
def sample1_se_bam_markdups(sample1_se_bam, tmpdir_factory): snakefile = ''' rule markduplicates: input: bam='sample1.bam' output: bam='sample1.dupsmarked.bam', metrics='sample1.dupmetrics.txt' log: 'log' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir({ sample1_se_bam: 'sample1.bam', }) tmpdir = str(tmpdir_factory.mktemp('markduplicates_fixture')) run(dpath('../wrappers/picard/markduplicates'), snakefile, None, input_data_func, tmpdir, use_conda=True) return { 'bam': os.path.join(tmpdir, 'sample1.dupsmarked.bam'), 'metrics': os.path.join(tmpdir, 'sample1.dupmetrics.txt') }
def generic_fixture(key, mapping, factory): """ Tries to handle as much of the magic as possible. Parameters ---------- key : str Key into the module-level config dict mapping : dict Maps paths from fixtures to input files expected by the snakefile tmpdir : str Path to temporary dir, usually created by utils.tmpdir_for_func Returns ------- After a successful Snakemake run, returns the dictionary of the config's `output` key but with paths fixed to be relative to tmpdir. This returned dict is ready to be used as a fixture by test functions. """ conf = config[key] tmpdir = utils.tmpdir_for_func(factory) input_data_func = utils.symlink_in_tempdir(mapping) utils.run(utils.dpath(conf['wrapper']), conf['snakefile'], None, input_data_func, tmpdir) output = conf['output'].copy() for k, v in output.items(): output[k] = os.path.join(tmpdir, v) return output
def test_picard_collectrnaseqmetrics_too_small_heap(sample1_se_tiny_bam, annotation_refflat, tmpdir): # set the java vm heap size to 128 bytes which should fail. This tests to # make sure the java args are making it through to the wrapper. snakefile = ''' rule collectrnaseqmetrics: input: bam='sample1.bam', refflat='dm6.refflat', output: metrics='sample1.metrics' log: 'log' params: extra="STRAND=NONE", java_args='-Xmx128' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir({ sample1_se_tiny_bam: 'sample1.bam', annotation_refflat: 'dm6.refflat', }) def check(): assert '## METRICS CLASS' in open('sample1.metrics').read() run(dpath('../wrappers/picard/collectrnaseqmetrics'), snakefile, check, input_data_func, tmpdir, use_conda=True)
def test_bam_stat(sample1_se_tiny_bam, tmpdir): snakefile = ''' rule bam_stat: input: bam='sample1_R1.bam' output: txt='sample1_R1.bam_stat.txt' wrapper: "file:wrapper" ''' input_data_func = symlink_in_tempdir({ sample1_se_tiny_bam: 'sample1_R1.bam', }) def check(): """ check for line lengths and that they are at least different sized """ with open('sample1_R1.bam_stat.txt', 'r') as handle: results = handle.readlines() assert results[5].split(':')[0] == 'Total records' assert results[-1] == 'Proper-paired reads map to different chrom:0\n' run(dpath('../wrappers/rseqc/bam_stat'), snakefile, check, input_data_func, tmpdir, use_conda=True)
def test_fastq_screen(sample1_se_tiny_fq, bowtie2_indexes, tmpdir): snakefile = ''' rule fastq_screen: input: fastq='sample1_R1.fastq.gz', dm6={indexes} output: txt='sample1_R1_screen.txt' params: subset=100000, aligner='bowtie2' wrapper: "file:wrapper" '''.format(indexes=bowtie2_indexes) input_data_func = symlink_in_tempdir( {sample1_se_tiny_fq: 'sample1_R1.fastq.gz'}) def check(): with open('sample1_R1_screen.txt') as fh: res = fh.readlines() r1 = res[0].strip().split() r3 = res[2].strip().split() assert r1[-1] == '100000' assert r3[0] == 'dm6' run(dpath('../wrappers/fastq_screen'), snakefile, check, input_data_func, tmpdir)
def test_cutadapt_pe(sample1_pe_tiny_fq, tmpdir): snakefile = ''' rule cutadapt: input: R1='sample1_R1.fastq.gz', R2='sample1_R2.fastq.gz', output: R1='sample1_R1.trim.fastq.gz', R2='sample2_R1.trim.fastq.gz', params: extra='-a AAA' log: 'sample1.cutadapt.log' wrapper: "file:wrapper" ''' input_data_func=symlink_in_tempdir( { sample1_pe_tiny_fq[0]: 'sample1_R1.fastq.gz', sample1_pe_tiny_fq[1]: 'sample1_R2.fastq.gz', } ) def check(): """ check for line lengths and that they are at least different sized """ a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz')) b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz')) assert a == b == 4040 assert 'This is cutadapt' in open('sample1.cutadapt.log').readline() assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz') run(dpath('../wrappers/cutadapt'), snakefile, check, input_data_func, tmpdir)
def test_kallisto_quant(tmpdir, sample1_se_tiny_fq, kallisto_index): snakefile = ''' rule kallisto_quant: input: fastq='sample1.fq.gz', index='out/transcriptome.idx' params: extra='--single --fragment-length=200 --sd=20' output: h5='quant/abundance.h5', tsv='quant/abundance.tsv', json='quant/run_info.json', wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir({ sample1_se_tiny_fq: 'sample1.fq.gz', kallisto_index: 'out/transcriptome.idx', }) def check(): assert sum(1 for _ in open('quant/abundance.tsv')) == 310 assert open('quant/abundance.tsv').readline() == ( 'target_id\tlength\teff_length\test_counts\ttpm\n') keys = [ 'call', 'index_version', 'n_bootstraps', 'n_processed', 'n_targets', 'start_time' ] d = json.load(open('quant/run_info.json')) for k in keys: assert k in d run(dpath('../wrappers/kallisto/quant'), snakefile, check, input_data_func, tmpdir)
def fastqc(sample1_se_tiny_fq, tmpdir_factory): snakefile = ''' rule fastqc: input: fastq='sample1_R1.fastq.gz' output: html='sample1_R1_fastqc.html', zip='sample1_R1_fastqc.zip' wrapper: "file:wrapper"''' input_data_func = symlink_in_tempdir( { sample1_se_tiny_fq: 'sample1_R1.fastq.gz' } ) tmpdir = str(tmpdir_factory.mktemp('fastqc_fixture')) run(dpath('../wrappers/fastqc'), snakefile, None, input_data_func, tmpdir) return os.path.join(tmpdir, 'sample1_R1_fastqc.zip')
def test_gB_cov(sample1_se_tiny_bam, sample1_se_tiny_bam_bai, annotation_bed12, tmpdir): snakefile = ''' rule geneBody_coverage: input: bam='sample1_R1.sort.bam', bai='sample1_R1.sort.bam.bai', bed='dm6.bed12' output: txt='sample1_R1.geneBodyCoverage.txt', r='sample1_R1.geneBodyCoverage.r', img='sample1_R1.geneBodyCoverage.pdf', wrapper: "file:wrapper" ''' input_data_func = symlink_in_tempdir({ sample1_se_tiny_bam: 'sample1_R1.sort.bam', sample1_se_tiny_bam_bai['bai']: 'sample1_R1.sort.bam.bai', annotation_bed12: 'dm6.bed12' }) def check(): """ check for line lengths and that they are at least different sized """ # R code with open('sample1_R1.geneBodyCoverage.r', 'r') as handle: result = handle.readline().split(' ')[0] assert result == 'sample1_R1.sort' # text with open('sample1_R1.geneBodyCoverage.txt', 'r') as handle: result = handle.readlines()[1].split('\t')[0] assert result == 'sample1_R1.sort' # PDF assert os.path.exists('sample1_R1.geneBodyCoverage.pdf') run(dpath('../wrappers/rseqc/geneBody_coverage'), snakefile, check, input_data_func, tmpdir, use_conda=True)
def sample1_se_bam_bai(sample1_se_bam, tmpdir_factory): """ Returns both the bam and the bam.bai """ snakefile = ''' rule index: input: bam='sample1.sorted.bam' output: bai='sample1.sorted.bam.bai' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir( {sample1_se_bam: 'sample1.sorted.bam'}) tmpdir = str(tmpdir_factory.mktemp('sample1_se_bam_bai')) run(dpath('../wrappers/samtools/index'), snakefile, None, input_data_func, tmpdir) return { 'bam': os.path.join(tmpdir, 'sample1.sorted.bam'), 'bai': os.path.join(tmpdir, 'sample1.sorted.bam.bai'), }
def test_fastqc(sample1_se_tiny_fq, tmpdir): snakefile = ''' rule fastqc: input: fastq='sample1_R1.fastq.gz' output: html='results/sample1_R1.html', zip='sample1_R1.zip' wrapper: "file:wrapper"''' input_data_func=symlink_in_tempdir( { sample1_se_tiny_fq: 'sample1_R1.fastq.gz' } ) def check(): assert '<html>' in open('results/sample1_R1.html').readline() contents = [ 'sample1_R1_fastqc/', 'sample1_R1_fastqc/Icons/', 'sample1_R1_fastqc/Images/', 'sample1_R1_fastqc/Icons/fastqc_icon.png', 'sample1_R1_fastqc/Icons/warning.png', 'sample1_R1_fastqc/Icons/error.png', 'sample1_R1_fastqc/Icons/tick.png', 'sample1_R1_fastqc/summary.txt', 'sample1_R1_fastqc/Images/per_base_quality.png', 'sample1_R1_fastqc/Images/per_tile_quality.png', 'sample1_R1_fastqc/Images/per_sequence_quality.png', 'sample1_R1_fastqc/Images/per_base_sequence_content.png', 'sample1_R1_fastqc/Images/per_sequence_gc_content.png', 'sample1_R1_fastqc/Images/per_base_n_content.png', 'sample1_R1_fastqc/Images/sequence_length_distribution.png', 'sample1_R1_fastqc/Images/duplication_levels.png', 'sample1_R1_fastqc/Images/adapter_content.png', 'sample1_R1_fastqc/fastqc_report.html', 'sample1_R1_fastqc/fastqc_data.txt', 'sample1_R1_fastqc/fastqc.fo' ] for i in zipfile.ZipFile('sample1_R1.zip').namelist(): assert i in contents run(dpath('../wrappers/fastqc'), snakefile, check, input_data_func, tmpdir)
def bowtie2_indexes(dm6_fa, tmpdir_factory): d = tmpdir_for_func(tmpdir_factory) snakefile = ''' rule bowtie2: input: fasta='dm6.fa' output: index=['dm6.1.bt2', 'dm6.2.bt2'] log: 'bowtie2.log' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir({dm6_fa: 'dm6.fa'}) def check(): assert 'Total time for backward call to driver' in open( 'bowtie2.log').readlines()[-1] assert list(shell('bowtie2-inspect dm6 -n', iterable=True)) == ['2L', '2R'] run(dpath('../wrappers/bowtie2/build'), snakefile, check, input_data_func, d) return aligners.bowtie2_index_from_prefix(os.path.join(d, 'dm6'))
def test_multiqc(fastqc, tmpdir): snakefile = ''' rule multiqc: input: 'results/sample1_R1_fastqc.zip' output: 'multiqc.html' log: 'log' params: analysis_directory='results' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir({ fastqc: 'results/sample1_R1_fastqc.zip', }) def check(): assert '<!DOCTYPE html>' in open('multiqc.html').readline() run(dpath('../wrappers/multiqc'), snakefile, check, input_data_func, tmpdir)
def kallisto_index(tmpdir_factory, transcriptome): d = tmpdir_for_func(tmpdir_factory) snakefile = ''' rule kallisto: input: fasta='transcriptome.fa' output: index='transcriptome.idx' log: 'log' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir({ transcriptome: 'transcriptome.fa', }) def check(): log = open('log').read() assert '[build] target deBruijn graph' run(dpath('../wrappers/kallisto/index'), snakefile, check, input_data_func, d) return os.path.join(d, 'transcriptome.idx')
def test_tin(sample1_se_tiny_bam, sample1_se_tiny_bam_bai, annotation_bed12, tmpdir): snakefile = ''' rule tin: input: bam='sample1_R1.sort.bam', bai='sample1_R1.sort.bam.bai', bed='dm6.bed12' output: table='sample1_R1.tin.tsv', summary='sample1_R1.tin.summary.txt' wrapper: "file:wrapper" ''' input_data_func = symlink_in_tempdir({ sample1_se_tiny_bam: 'sample1_R1.sort.bam', sample1_se_tiny_bam_bai['bai']: 'sample1_R1.sort.bam.bai', annotation_bed12: 'dm6.bed12' }) def check(): """ check for line lengths and that they are at least different sized """ # R code with open('sample1_R1.tin.tsv', 'r') as handle: result = handle.readline().strip().split('\t') assert result == ['geneID', 'chrom', 'tx_start', 'tx_end', 'TIN'] # text with open('sample1_R1.tin.summary.txt', 'r') as handle: result = handle.readline().strip().split('\t') assert result == ['Bam_file', 'TIN(mean)', 'TIN(median)', 'TIN(stdev)'] run(dpath('../wrappers/rseqc/tin'), snakefile, check, input_data_func, tmpdir, use_conda=True)
def sample1_se_dupradar(sample1_se_bam_markdups, annotation, tmpdir_factory): snakefile = ''' rule dupradar: input: bam='sample1.bam', annotation='dm6.gtf' output: density_scatter='sample1.density_scatter.png', expression_histogram='sample1.expression_histogram.png', expression_barplot='sample1.expression_barplot.png', expression_boxplot='sample1.expression_boxplot.png', multimapping_histogram='sample1.multimapping_histogram.png', dataframe='sample1.dupradar.tsv', model='sample1.model.txt', curve='sample1.curve.txt' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir({ sample1_se_bam_markdups['bam']: 'sample1.bam', annotation: 'dm6.gtf', }) tmpdir = str(tmpdir_factory.mktemp('dupradar_fixture')) run(dpath('../wrappers/dupradar'), snakefile, None, input_data_func, tmpdir, use_conda=False) mapping = dict( density_scatter='sample1.density_scatter.png', expression_histogram='sample1.expression_histogram.png', expression_barplot='sample1.expression_barplot.png', expression_boxplot='sample1.expression_boxplot.png', multimapping_histogram='sample1.multimapping_histogram.png', dataframe='sample1.dupradar.tsv', ) for k, v in mapping.items(): mapping[k] = os.path.join(tmpdir, v) return mapping
def test_hisat2_align_se_SRA(hisat2_indexes, tmpdir): d = _dict_of_hisat2_indexes(hisat2_indexes, '2L') indexes = list(d.values()) snakefile = ''' rule hisat2_align: input: index={indexes} output: bam='sample1.bam' params: hisat2_extra='--sra-acc SRR1990338' log: "hisat2.log" wrapper: "file:wrapper" '''.format(indexes=indexes) input_data_func = symlink_in_tempdir(d) def check(): assert "overall alignment rate" in open('hisat2.log').read() # should have at least some mapped and unmapped assert int(list(shell('samtools view -c -f 0x04 sample1.bam', iterable=True))[0]) > 0 assert int(list(shell('samtools view -c -F 0x04 sample1.bam', iterable=True))[0]) > 0 run(dpath('../wrappers/hisat2/align'), snakefile, check, input_data_func, tmpdir)
def hisat2_indexes(dm6_fa, tmpdir_factory): d = tmpdir_for_func(tmpdir_factory) snakefile = ''' rule hisat2: input: fasta='2L.fa' output: index=['2L.1.ht2', '2L.2.ht2'] log: 'hisat.log' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir( { dm6_fa: '2L.fa' } ) def check(): assert 'Total time for call to driver' in open('hisat.log').readlines()[-1] assert list(shell('hisat2-inspect 2L -n', iterable=True)) == ['2L', '2R'] run( dpath('../wrappers/hisat2/build'), snakefile, check, input_data_func, d) return aligners.hisat2_index_from_prefix(os.path.join(d, '2L'))
def salmon_index(tmpdir_factory, transcriptome): d = tmpdir_for_func(tmpdir_factory) snakefile = ''' rule salmon: input: fasta='transcriptome.fa' output: hash='salmon_index/hash.bin' log: 'log' wrapper: 'file:wrapper' ''' input_data_func = symlink_in_tempdir( { transcriptome: 'transcriptome.fa', } ) def check(): log = open('log').read() assert '[info] done building index' in log run( dpath('../wrappers/salmon/index'), snakefile, check, input_data_func, d) return os.path.join(d, 'salmon_index')
def test_deeptools_bamCoverage(sample1_se_bam, sample1_se_bam_bai, tmpdir): snakefile = ''' rule deeptools: input: bam='sample1.bam', bai='sample1.bam.bai' output: 'sample1.bw', log: 'deeptools.log' wrapper: "file:wrapper" ''' input_data_func = symlink_in_tempdir({ sample1_se_bam: 'sample1.bam', sample1_se_bam_bai['bai']: 'sample1.bam.bai', }) def check(): bw = pyBigWig.open('sample1.bw') assert bw.header()['sumData'] == 195295397 assert bw.stats('2L')[0] == 8.242775364434165 run(dpath('../wrappers/deeptools/bamCoverage'), snakefile, check, input_data_func, tmpdir)
def test_deeptools_bamCoverage(sample1_se_tiny_bam, sample1_se_tiny_bam_bai, tmpdir): snakefile = ''' rule deeptools: input: bam='sample1.bam', bai='sample1.bam.bai' output: 'sample1.bw', log: 'deeptools.log' wrapper: "file:wrapper" ''' input_data_func = symlink_in_tempdir({ sample1_se_tiny_bam: 'sample1.bam', sample1_se_tiny_bam_bai['bai']: 'sample1.bam.bai', }) def check(): bw = pyBigWig.open('sample1.bw') header_keys = list(bw.header().keys()) for k in [ 'maxVal', 'minVal', 'nBasesCovered', 'nLevels', 'sumData', 'sumSquared', 'version' ]: assert k in header_keys # bigWig version should be independent of BAM input, so we can check # the value assert bw.header()['version'] == 4 first_chrom = list(bw.chroms().keys())[0] assert isinstance(bw.stats(first_chrom)[0], float) run(dpath('../wrappers/deeptools/bamCoverage'), snakefile, check, input_data_func, tmpdir)
def test_featurecounts_se(sample1_se_tiny_bam, annotation, tmpdir): snakefile = ''' rule featurecounts: input: annotation='dm6.gtf', bam='sample1.bam' output: counts='sample1.counts', log: 'featurecounts.log' wrapper: "file:wrapper" ''' input_data_func = symlink_in_tempdir({ sample1_se_tiny_bam: 'sample1.bam', annotation: 'dm6.gtf', }) def check(): assert '//===================' in open('featurecounts.log').read() assert '# Program:featureCounts' in open('sample1.counts').readline() assert open('sample1.counts.summary').readline().startswith('Status') assert sum(1 for _ in open('sample1.counts')) == 169 run(dpath('../wrappers/featurecounts'), snakefile, check, input_data_func, tmpdir)
def test_infer_experiment(sample1_se_tiny_bam, annotation_bed12, tmpdir): snakefile = ''' rule infer_experiment: input: bam='sample1_R1.bam', bed='dm6.bed12' output: txt = 'sample1_R1.infer_experiment.txt' wrapper: "file:wrapper" ''' input_data_func = symlink_in_tempdir({ sample1_se_tiny_bam: 'sample1_R1.bam', annotation_bed12: 'dm6.bed12' }) def check(): """ check for line lengths and that they are at least different sized """ expected = dedent("""\ This is SingleEnd Data Fraction of reads failed to determine: Fraction of reads explained by "++,--": Fraction of reads explained by "+-,-+":""").splitlines(False) with open('sample1_R1.infer_experiment.txt', 'r') as handle: results = handle.read().strip() for ex in expected: assert ex in results run(dpath('../wrappers/rseqc/infer_experiment'), snakefile, check, input_data_func, tmpdir, use_conda=True)