コード例 #1
0
ファイル: test_salmon.py プロジェクト: reneechou123/lcdb-wf
def test_salmon_quant_single_index(tmpdir, sample1_se_tiny_fq, salmon_index):
    snakefile = '''
    rule salmon_quant:
        input:
             unmatedReads='sample1.fq.gz',
             index='idx/hash.bin'
        output: 'sample1/salmon/quant.sf'
        params: extra='--libType A'
        log: 'salmon.quant.log'
        wrapper: 'file:wrapper'
    '''
    input_data_func = symlink_in_tempdir(
        {
            sample1_se_tiny_fq: 'sample1.fq.gz',
            salmon_index: 'idx',
        }
    )

    def check():
        assert open('sample1/salmon/quant.sf').readline() == (
                'Name\tLength\tEffectiveLength\tTPM\tNumReads\n')

    run(
        dpath('../wrappers/salmon/quant'),
        snakefile, check, input_data_func, tmpdir)
コード例 #2
0
def test_demo_pe(sample1_pe_fq, tmpdir):

    # In contrast to the sample1_se_tiny_fq fixture used in the previous function,
    # here the paired-end fixture `sample1_pe_fq` is a tuple of path names (see
    # conftest.sample1_pe_fq())

    # The snakefile reflects what the wrapper expects for PE (see
    # wrappers/demo/README.md).
    snakefile = '''
    rule demo:
        input:
            R1='a1.fastq.gz',
            R2='a2.fastq.gz'
        output:
            R1='b1.fastq.gz',
            R2='b2.fastq.gz'
        wrapper: "file:wrapper"
    '''

    # Map fixture to input files. Again, since this is paired-end we need to
    # make sure both files are provided the right filename for testing.
    input_data_func = symlink_in_tempdir({
        sample1_pe_fq[0]: 'a1.fastq.gz',
        sample1_pe_fq[1]: 'a2.fastq.gz',
    })

    def check():
        assert open('a1.fastq.gz', 'rb').read() == open('b1.fastq.gz',
                                                        'rb').read()
        assert open('a2.fastq.gz', 'rb').read() == open('b2.fastq.gz',
                                                        'rb').read()

    run(dpath('../wrappers/demo'), snakefile, check, input_data_func, tmpdir)
コード例 #3
0
def test_cutadapt_se_with_list(sample1_se_tiny_fq, tmpdir):
    snakefile = '''
                rule cutadapt:
                    input: 'sample1_R1.fastq.gz'
                    output: 'sample1_R1.trim.fastq.gz'
                    params: extra='-a AAA'
                    wrapper: "file:wrapper"
                '''
    input_data_func=symlink_in_tempdir(
        {
            sample1_se_tiny_fq: 'sample1_R1.fastq.gz'
        }
    )

    def check():
        """
        check for line lengths and that they are at least different sized
        """
        a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz'))
        b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz'))
        assert a == b == 4040

        assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz')

    run(dpath('../wrappers/cutadapt'), snakefile, check, input_data_func, tmpdir)
コード例 #4
0
def test_featurecounts_pe(sample1_pe_tiny_bam, annotation, tmpdir):
    snakefile = '''
                rule featurecounts:
                    input:
                        annotation='dm6.gtf',
                        bam='sample1.bam'
                    output:
                        counts='sample1.counts',
                    log: 'featurecounts.log'
                    params: extra='-p -P -s 1 -B --splitOnly'
                    wrapper: "file:wrapper"
                '''
    input_data_func = symlink_in_tempdir({
        sample1_pe_tiny_bam: 'sample1.bam',
        annotation: 'dm6.gtf',
    })

    def check():
        assert '//===================' in open('featurecounts.log').read()
        assert '# Program:featureCounts' in open('sample1.counts').readline()
        assert open('sample1.counts.summary').readline().startswith('Status')
        assert sum(1 for _ in open('sample1.counts')) == 169

        # TODO: maybe assert that below a certain level are counted when all
        # those extra arguments are used?

    run(dpath('../wrappers/featurecounts'), snakefile, check, input_data_func,
        tmpdir)
コード例 #5
0
def test_bowtie2_align_se_rm_unmapped(bowtie2_indexes, sample1_se_tiny_fq,
                                      tmpdir):
    d = _dict_of_bowtie2_indexes(bowtie2_indexes, 'dm6')
    indexes = list(d.values())
    snakefile = '''
        rule bowtie2_align:
            input:
                fastq='sample1_R1.fastq.gz',
                index={indexes}
            output:
                bam='sample1.bam'
            params:
                samtools_view_extra='-F 0x04'
            log: "bowtie2.log"
            wrapper: "file:wrapper"
    '''.format(indexes=indexes)
    d[sample1_se_tiny_fq] = 'sample1_R1.fastq.gz'
    input_data_func = symlink_in_tempdir(d)

    def check():
        assert "overall alignment rate" in open('bowtie2.log').read()

        # should have at least some mapped and unmapped
        assert int(
            list(shell('samtools view -c -f 0x04 sample1.bam',
                       iterable=True))[0]) == 0
        assert int(
            list(shell('samtools view -c -F 0x04 sample1.bam',
                       iterable=True))[0]) > 0

    run(dpath('../wrappers/bowtie2/align'), snakefile, check, input_data_func,
        tmpdir)
コード例 #6
0
def test_gB_cov_png(sample1_se_tiny_bam, sample1_se_tiny_bam_bai,
                    annotation_bed12, tmpdir):
    snakefile = '''
                rule geneBody_coverage:
                    input:
                        bam='sample1_R1.sort.bam',
                        bai='sample1_R1.sort.bam.bai',
                        bed='dm6.bed12'
                    output:
                        txt='sample1_R1.geneBodyCoverage.txt',
                        r='sample1_R1.geneBodyCoverage.r',
                        img='sample1_R1.geneBodyCoverage.png',
                    params:
                        extra: = '-f png'
                    wrapper: "file:wrapper"
                '''
    input_data_func = symlink_in_tempdir({
        sample1_se_tiny_bam: 'sample1_R1.sort.bam',
        sample1_se_tiny_bam_bai['bai']: 'sample1_R1.sort.bam.bai',
        annotation_bed12: 'dm6.bed12'
    })

    def check():
        """ Check that the PNG is created """
        assert os.path.exists('sample1_R1.geneBodyCoverage.png')
コード例 #7
0
ファイル: test_picard.py プロジェクト: reneechou123/lcdb-wf
def test_picard_collectrnaseqmetrics_se_plot(sample1_se_tiny_bam,
                                             annotation_refflat, tmpdir):
    snakefile = '''
    rule collectrnaseqmetrics:
        input:
            bam='sample1.bam',
            refflat='dm6.refflat',
        output:
            metrics='sample1.metrics',
            plot='sample1.pdf'
        log: 'log'
        params: extra="STRAND=NONE CHART=sample1.pdf"
        wrapper: 'file:wrapper'
    '''
    input_data_func = symlink_in_tempdir({
        sample1_se_tiny_bam: 'sample1.bam',
        annotation_refflat: 'dm6.refflat',
    })

    def check():
        assert '## METRICS CLASS' in open('sample1.metrics').read()

    run(dpath('../wrappers/picard/collectrnaseqmetrics'),
        snakefile,
        check,
        input_data_func,
        tmpdir,
        use_conda=True)
コード例 #8
0
ファイル: test_picard.py プロジェクト: reneechou123/lcdb-wf
def sample1_se_bam_markdups(sample1_se_bam, tmpdir_factory):
    snakefile = '''
    rule markduplicates:
        input:
            bam='sample1.bam'
        output:
            bam='sample1.dupsmarked.bam',
            metrics='sample1.dupmetrics.txt'
        log: 'log'
        wrapper: 'file:wrapper'
    '''
    input_data_func = symlink_in_tempdir({
        sample1_se_bam: 'sample1.bam',
    })
    tmpdir = str(tmpdir_factory.mktemp('markduplicates_fixture'))
    run(dpath('../wrappers/picard/markduplicates'),
        snakefile,
        None,
        input_data_func,
        tmpdir,
        use_conda=True)
    return {
        'bam': os.path.join(tmpdir, 'sample1.dupsmarked.bam'),
        'metrics': os.path.join(tmpdir, 'sample1.dupmetrics.txt')
    }
コード例 #9
0
ファイル: test_toy.py プロジェクト: reneechou123/lcdb-wf
def generic_fixture(key, mapping, factory):
    """
    Tries to handle as much of the magic as possible.

    Parameters
    ----------
    key : str
        Key into the module-level config dict

    mapping : dict
        Maps paths from fixtures to input files expected by the snakefile

    tmpdir : str
        Path to temporary dir, usually created by utils.tmpdir_for_func

    Returns
    -------
    After a successful Snakemake run, returns the dictionary of the config's
    `output` key but with paths fixed to be relative to tmpdir. This returned
    dict is ready to be used as a fixture by test functions.
    """
    conf = config[key]
    tmpdir = utils.tmpdir_for_func(factory)
    input_data_func = utils.symlink_in_tempdir(mapping)
    utils.run(utils.dpath(conf['wrapper']), conf['snakefile'], None,
              input_data_func, tmpdir)
    output = conf['output'].copy()
    for k, v in output.items():
        output[k] = os.path.join(tmpdir, v)
    return output
コード例 #10
0
ファイル: test_picard.py プロジェクト: reneechou123/lcdb-wf
def test_picard_collectrnaseqmetrics_too_small_heap(sample1_se_tiny_bam,
                                                    annotation_refflat,
                                                    tmpdir):
    # set the java vm heap size to 128 bytes which should fail. This tests to
    # make sure the java args are making it through to the wrapper.
    snakefile = '''
    rule collectrnaseqmetrics:
        input:
            bam='sample1.bam',
            refflat='dm6.refflat',
        output:
            metrics='sample1.metrics'
        log: 'log'
        params:
            extra="STRAND=NONE",
            java_args='-Xmx128'
        wrapper: 'file:wrapper'
    '''
    input_data_func = symlink_in_tempdir({
        sample1_se_tiny_bam: 'sample1.bam',
        annotation_refflat: 'dm6.refflat',
    })

    def check():
        assert '## METRICS CLASS' in open('sample1.metrics').read()

    run(dpath('../wrappers/picard/collectrnaseqmetrics'),
        snakefile,
        check,
        input_data_func,
        tmpdir,
        use_conda=True)
コード例 #11
0
def test_bam_stat(sample1_se_tiny_bam, tmpdir):
    snakefile = '''
                rule bam_stat:
                    input:
                        bam='sample1_R1.bam'
                    output: txt='sample1_R1.bam_stat.txt'
                    wrapper: "file:wrapper"
                '''
    input_data_func = symlink_in_tempdir({
        sample1_se_tiny_bam: 'sample1_R1.bam',
    })

    def check():
        """
        check for line lengths and that they are at least different sized
        """
        with open('sample1_R1.bam_stat.txt', 'r') as handle:
            results = handle.readlines()

        assert results[5].split(':')[0] == 'Total records'
        assert results[-1] == 'Proper-paired reads map to different chrom:0\n'

    run(dpath('../wrappers/rseqc/bam_stat'),
        snakefile,
        check,
        input_data_func,
        tmpdir,
        use_conda=True)
コード例 #12
0
def test_fastq_screen(sample1_se_tiny_fq, bowtie2_indexes, tmpdir):
    snakefile = '''
    rule fastq_screen:
        input:
            fastq='sample1_R1.fastq.gz',
            dm6={indexes}
        output:
            txt='sample1_R1_screen.txt'
        params:
            subset=100000,
            aligner='bowtie2'
        wrapper:
            "file:wrapper"
    '''.format(indexes=bowtie2_indexes)

    input_data_func = symlink_in_tempdir(
        {sample1_se_tiny_fq: 'sample1_R1.fastq.gz'})

    def check():
        with open('sample1_R1_screen.txt') as fh:
            res = fh.readlines()
            r1 = res[0].strip().split()
            r3 = res[2].strip().split()
            assert r1[-1] == '100000'
            assert r3[0] == 'dm6'

    run(dpath('../wrappers/fastq_screen'), snakefile, check, input_data_func,
        tmpdir)
コード例 #13
0
def test_cutadapt_pe(sample1_pe_tiny_fq, tmpdir):
    snakefile = '''
                rule cutadapt:
                    input:
                        R1='sample1_R1.fastq.gz',
                        R2='sample1_R2.fastq.gz',
                    output:
                        R1='sample1_R1.trim.fastq.gz',
                        R2='sample2_R1.trim.fastq.gz',
                    params: extra='-a AAA'
                    log: 'sample1.cutadapt.log'
                    wrapper: "file:wrapper"
                '''
    input_data_func=symlink_in_tempdir(
        {
            sample1_pe_tiny_fq[0]: 'sample1_R1.fastq.gz',
            sample1_pe_tiny_fq[1]: 'sample1_R2.fastq.gz',
        }
    )

    def check():
        """
        check for line lengths and that they are at least different sized
        """
        a = sum(1 for _ in gzip.open('sample1_R1.fastq.gz'))
        b = sum(1 for _ in gzip.open('sample1_R1.trim.fastq.gz'))
        assert a == b == 4040
        assert 'This is cutadapt' in open('sample1.cutadapt.log').readline()

        assert os.path.getsize('sample1_R1.fastq.gz') != os.path.getsize('sample1_R1.trim.fastq.gz')

    run(dpath('../wrappers/cutadapt'), snakefile, check, input_data_func, tmpdir)
コード例 #14
0
ファイル: test_kallisto.py プロジェクト: reneechou123/lcdb-wf
def test_kallisto_quant(tmpdir, sample1_se_tiny_fq, kallisto_index):
    snakefile = '''
    rule kallisto_quant:
        input:
             fastq='sample1.fq.gz',
             index='out/transcriptome.idx'

        params: extra='--single --fragment-length=200 --sd=20'
        output:
            h5='quant/abundance.h5',
            tsv='quant/abundance.tsv',
            json='quant/run_info.json',
        wrapper: 'file:wrapper'
    '''
    input_data_func = symlink_in_tempdir({
        sample1_se_tiny_fq:
        'sample1.fq.gz',
        kallisto_index:
        'out/transcriptome.idx',
    })

    def check():
        assert sum(1 for _ in open('quant/abundance.tsv')) == 310
        assert open('quant/abundance.tsv').readline() == (
            'target_id\tlength\teff_length\test_counts\ttpm\n')
        keys = [
            'call', 'index_version', 'n_bootstraps', 'n_processed',
            'n_targets', 'start_time'
        ]
        d = json.load(open('quant/run_info.json'))
        for k in keys:
            assert k in d

    run(dpath('../wrappers/kallisto/quant'), snakefile, check, input_data_func,
        tmpdir)
コード例 #15
0
def fastqc(sample1_se_tiny_fq, tmpdir_factory):
    snakefile = '''
    rule fastqc:
        input:
            fastq='sample1_R1.fastq.gz'
        output:
            html='sample1_R1_fastqc.html',
            zip='sample1_R1_fastqc.zip'
        wrapper: "file:wrapper"'''
    input_data_func = symlink_in_tempdir(
        {
            sample1_se_tiny_fq: 'sample1_R1.fastq.gz'
        }
    )
    tmpdir = str(tmpdir_factory.mktemp('fastqc_fixture'))
    run(dpath('../wrappers/fastqc'), snakefile, None, input_data_func, tmpdir)
    return os.path.join(tmpdir, 'sample1_R1_fastqc.zip')
コード例 #16
0
def test_gB_cov(sample1_se_tiny_bam, sample1_se_tiny_bam_bai, annotation_bed12,
                tmpdir):
    snakefile = '''
                rule geneBody_coverage:
                    input:
                        bam='sample1_R1.sort.bam',
                        bai='sample1_R1.sort.bam.bai',
                        bed='dm6.bed12'
                    output: txt='sample1_R1.geneBodyCoverage.txt',
                            r='sample1_R1.geneBodyCoverage.r',
                            img='sample1_R1.geneBodyCoverage.pdf',
                    wrapper: "file:wrapper"
                '''
    input_data_func = symlink_in_tempdir({
        sample1_se_tiny_bam: 'sample1_R1.sort.bam',
        sample1_se_tiny_bam_bai['bai']: 'sample1_R1.sort.bam.bai',
        annotation_bed12: 'dm6.bed12'
    })

    def check():
        """
        check for line lengths and that they are at least different sized
        """

        # R code
        with open('sample1_R1.geneBodyCoverage.r', 'r') as handle:
            result = handle.readline().split(' ')[0]

        assert result == 'sample1_R1.sort'

        # text
        with open('sample1_R1.geneBodyCoverage.txt', 'r') as handle:
            result = handle.readlines()[1].split('\t')[0]

        assert result == 'sample1_R1.sort'

        # PDF
        assert os.path.exists('sample1_R1.geneBodyCoverage.pdf')

    run(dpath('../wrappers/rseqc/geneBody_coverage'),
        snakefile,
        check,
        input_data_func,
        tmpdir,
        use_conda=True)
コード例 #17
0
def sample1_se_bam_bai(sample1_se_bam, tmpdir_factory):
    """
    Returns both the bam and the bam.bai
    """
    snakefile = '''
    rule index:
        input: bam='sample1.sorted.bam'
        output: bai='sample1.sorted.bam.bai'
        wrapper: 'file:wrapper'
    '''
    input_data_func = symlink_in_tempdir(
        {sample1_se_bam: 'sample1.sorted.bam'})
    tmpdir = str(tmpdir_factory.mktemp('sample1_se_bam_bai'))
    run(dpath('../wrappers/samtools/index'), snakefile, None, input_data_func,
        tmpdir)
    return {
        'bam': os.path.join(tmpdir, 'sample1.sorted.bam'),
        'bai': os.path.join(tmpdir, 'sample1.sorted.bam.bai'),
    }
コード例 #18
0
def test_fastqc(sample1_se_tiny_fq, tmpdir):
    snakefile = '''
    rule fastqc:
        input:
            fastq='sample1_R1.fastq.gz'
        output:
            html='results/sample1_R1.html',
            zip='sample1_R1.zip'
        wrapper: "file:wrapper"'''
    input_data_func=symlink_in_tempdir(
        {
            sample1_se_tiny_fq: 'sample1_R1.fastq.gz'
        }
    )

    def check():
        assert '<html>' in open('results/sample1_R1.html').readline()
        contents = [
            'sample1_R1_fastqc/',
            'sample1_R1_fastqc/Icons/',
            'sample1_R1_fastqc/Images/',
            'sample1_R1_fastqc/Icons/fastqc_icon.png',
            'sample1_R1_fastqc/Icons/warning.png',
            'sample1_R1_fastqc/Icons/error.png',
            'sample1_R1_fastqc/Icons/tick.png',
            'sample1_R1_fastqc/summary.txt',
            'sample1_R1_fastqc/Images/per_base_quality.png',
            'sample1_R1_fastqc/Images/per_tile_quality.png',
            'sample1_R1_fastqc/Images/per_sequence_quality.png',
            'sample1_R1_fastqc/Images/per_base_sequence_content.png',
            'sample1_R1_fastqc/Images/per_sequence_gc_content.png',
            'sample1_R1_fastqc/Images/per_base_n_content.png',
            'sample1_R1_fastqc/Images/sequence_length_distribution.png',
            'sample1_R1_fastqc/Images/duplication_levels.png',
            'sample1_R1_fastqc/Images/adapter_content.png',
            'sample1_R1_fastqc/fastqc_report.html',
            'sample1_R1_fastqc/fastqc_data.txt',
            'sample1_R1_fastqc/fastqc.fo'
        ]
        for i in zipfile.ZipFile('sample1_R1.zip').namelist():
            assert i in contents

    run(dpath('../wrappers/fastqc'), snakefile, check, input_data_func, tmpdir)
コード例 #19
0
def bowtie2_indexes(dm6_fa, tmpdir_factory):
    d = tmpdir_for_func(tmpdir_factory)
    snakefile = '''
    rule bowtie2:
        input: fasta='dm6.fa'
        output: index=['dm6.1.bt2', 'dm6.2.bt2']
        log: 'bowtie2.log'
        wrapper: 'file:wrapper'
    '''
    input_data_func = symlink_in_tempdir({dm6_fa: 'dm6.fa'})

    def check():
        assert 'Total time for backward call to driver' in open(
            'bowtie2.log').readlines()[-1]
        assert list(shell('bowtie2-inspect dm6 -n',
                          iterable=True)) == ['2L', '2R']

    run(dpath('../wrappers/bowtie2/build'), snakefile, check, input_data_func,
        d)
    return aligners.bowtie2_index_from_prefix(os.path.join(d, 'dm6'))
コード例 #20
0
def test_multiqc(fastqc, tmpdir):
    snakefile = '''
    rule multiqc:
        input: 'results/sample1_R1_fastqc.zip'
        output: 'multiqc.html'
        log: 'log'
        params:
            analysis_directory='results'
        wrapper: 'file:wrapper'
    '''
    input_data_func = symlink_in_tempdir({
        fastqc:
        'results/sample1_R1_fastqc.zip',
    })

    def check():
        assert '<!DOCTYPE html>' in open('multiqc.html').readline()

    run(dpath('../wrappers/multiqc'), snakefile, check, input_data_func,
        tmpdir)
コード例 #21
0
ファイル: test_kallisto.py プロジェクト: reneechou123/lcdb-wf
def kallisto_index(tmpdir_factory, transcriptome):
    d = tmpdir_for_func(tmpdir_factory)
    snakefile = '''
    rule kallisto:
        input: fasta='transcriptome.fa'
        output: index='transcriptome.idx'
        log: 'log'
        wrapper: 'file:wrapper'
    '''
    input_data_func = symlink_in_tempdir({
        transcriptome: 'transcriptome.fa',
    })

    def check():
        log = open('log').read()
        assert '[build] target deBruijn graph'

    run(dpath('../wrappers/kallisto/index'), snakefile, check, input_data_func,
        d)
    return os.path.join(d, 'transcriptome.idx')
コード例 #22
0
def test_tin(sample1_se_tiny_bam, sample1_se_tiny_bam_bai, annotation_bed12,
             tmpdir):
    snakefile = '''
                rule tin:
                    input:
                        bam='sample1_R1.sort.bam',
                        bai='sample1_R1.sort.bam.bai',
                        bed='dm6.bed12'
                    output: table='sample1_R1.tin.tsv',
                            summary='sample1_R1.tin.summary.txt'
                    wrapper: "file:wrapper"
                '''
    input_data_func = symlink_in_tempdir({
        sample1_se_tiny_bam: 'sample1_R1.sort.bam',
        sample1_se_tiny_bam_bai['bai']: 'sample1_R1.sort.bam.bai',
        annotation_bed12: 'dm6.bed12'
    })

    def check():
        """
        check for line lengths and that they are at least different sized
        """

        # R code
        with open('sample1_R1.tin.tsv', 'r') as handle:
            result = handle.readline().strip().split('\t')

        assert result == ['geneID', 'chrom', 'tx_start', 'tx_end', 'TIN']

        # text
        with open('sample1_R1.tin.summary.txt', 'r') as handle:
            result = handle.readline().strip().split('\t')

        assert result == ['Bam_file', 'TIN(mean)', 'TIN(median)', 'TIN(stdev)']

    run(dpath('../wrappers/rseqc/tin'),
        snakefile,
        check,
        input_data_func,
        tmpdir,
        use_conda=True)
コード例 #23
0
def sample1_se_dupradar(sample1_se_bam_markdups, annotation, tmpdir_factory):
    snakefile = '''
    rule dupradar:
        input:
            bam='sample1.bam',
            annotation='dm6.gtf'
        output:
            density_scatter='sample1.density_scatter.png',
            expression_histogram='sample1.expression_histogram.png',
            expression_barplot='sample1.expression_barplot.png',
            expression_boxplot='sample1.expression_boxplot.png',
            multimapping_histogram='sample1.multimapping_histogram.png',
            dataframe='sample1.dupradar.tsv',
            model='sample1.model.txt',
            curve='sample1.curve.txt'
        wrapper:
            'file:wrapper'
    '''
    input_data_func = symlink_in_tempdir({
        sample1_se_bam_markdups['bam']: 'sample1.bam',
        annotation: 'dm6.gtf',
    })
    tmpdir = str(tmpdir_factory.mktemp('dupradar_fixture'))
    run(dpath('../wrappers/dupradar'),
        snakefile,
        None,
        input_data_func,
        tmpdir,
        use_conda=False)
    mapping = dict(
        density_scatter='sample1.density_scatter.png',
        expression_histogram='sample1.expression_histogram.png',
        expression_barplot='sample1.expression_barplot.png',
        expression_boxplot='sample1.expression_boxplot.png',
        multimapping_histogram='sample1.multimapping_histogram.png',
        dataframe='sample1.dupradar.tsv',
    )
    for k, v in mapping.items():
        mapping[k] = os.path.join(tmpdir, v)
    return mapping
コード例 #24
0
def test_hisat2_align_se_SRA(hisat2_indexes, tmpdir):
    d = _dict_of_hisat2_indexes(hisat2_indexes, '2L')
    indexes = list(d.values())
    snakefile = '''
        rule hisat2_align:
            input:
                index={indexes}
            output:
                bam='sample1.bam'
            params: hisat2_extra='--sra-acc SRR1990338'
            log: "hisat2.log"
            wrapper: "file:wrapper"
    '''.format(indexes=indexes)
    input_data_func = symlink_in_tempdir(d)

    def check():
        assert "overall alignment rate" in open('hisat2.log').read()

        # should have at least some mapped and unmapped
        assert int(list(shell('samtools view -c -f 0x04 sample1.bam', iterable=True))[0]) > 0
        assert int(list(shell('samtools view -c -F 0x04 sample1.bam', iterable=True))[0]) > 0

    run(dpath('../wrappers/hisat2/align'), snakefile, check, input_data_func, tmpdir)
コード例 #25
0
def hisat2_indexes(dm6_fa, tmpdir_factory):
    d = tmpdir_for_func(tmpdir_factory)
    snakefile = '''
    rule hisat2:
        input: fasta='2L.fa'
        output: index=['2L.1.ht2', '2L.2.ht2']
        log: 'hisat.log'
        wrapper: 'file:wrapper'
    '''
    input_data_func = symlink_in_tempdir(
        {
            dm6_fa: '2L.fa'
        }
    )

    def check():
        assert 'Total time for call to driver' in open('hisat.log').readlines()[-1]
        assert list(shell('hisat2-inspect 2L -n', iterable=True)) == ['2L', '2R']

    run(
        dpath('../wrappers/hisat2/build'),
        snakefile, check, input_data_func, d)
    return aligners.hisat2_index_from_prefix(os.path.join(d, '2L'))
コード例 #26
0
ファイル: test_salmon.py プロジェクト: reneechou123/lcdb-wf
def salmon_index(tmpdir_factory, transcriptome):
    d = tmpdir_for_func(tmpdir_factory)
    snakefile = '''
    rule salmon:
        input: fasta='transcriptome.fa'
        output: hash='salmon_index/hash.bin'
        log: 'log'
        wrapper: 'file:wrapper'
    '''
    input_data_func = symlink_in_tempdir(
        {
            transcriptome: 'transcriptome.fa',
        }
    )

    def check():
        log = open('log').read()
        assert '[info] done building index' in log

    run(
        dpath('../wrappers/salmon/index'),
        snakefile, check, input_data_func, d)
    return os.path.join(d, 'salmon_index')
コード例 #27
0
def test_deeptools_bamCoverage(sample1_se_bam, sample1_se_bam_bai, tmpdir):
    snakefile = '''
                rule deeptools:
                    input:
                        bam='sample1.bam',
                        bai='sample1.bam.bai'
                    output: 'sample1.bw',
                    log: 'deeptools.log'
                    wrapper: "file:wrapper"
                '''
    input_data_func = symlink_in_tempdir({
        sample1_se_bam:
        'sample1.bam',
        sample1_se_bam_bai['bai']:
        'sample1.bam.bai',
    })

    def check():
        bw = pyBigWig.open('sample1.bw')
        assert bw.header()['sumData'] == 195295397
        assert bw.stats('2L')[0] == 8.242775364434165

    run(dpath('../wrappers/deeptools/bamCoverage'), snakefile, check,
        input_data_func, tmpdir)
コード例 #28
0
def test_deeptools_bamCoverage(sample1_se_tiny_bam, sample1_se_tiny_bam_bai,
                               tmpdir):
    snakefile = '''
                rule deeptools:
                    input:
                        bam='sample1.bam',
                        bai='sample1.bam.bai'
                    output: 'sample1.bw',
                    log: 'deeptools.log'
                    wrapper: "file:wrapper"
                '''
    input_data_func = symlink_in_tempdir({
        sample1_se_tiny_bam:
        'sample1.bam',
        sample1_se_tiny_bam_bai['bai']:
        'sample1.bam.bai',
    })

    def check():
        bw = pyBigWig.open('sample1.bw')
        header_keys = list(bw.header().keys())
        for k in [
                'maxVal', 'minVal', 'nBasesCovered', 'nLevels', 'sumData',
                'sumSquared', 'version'
        ]:
            assert k in header_keys

        # bigWig version should be independent of BAM input, so we can check
        # the value
        assert bw.header()['version'] == 4

        first_chrom = list(bw.chroms().keys())[0]
        assert isinstance(bw.stats(first_chrom)[0], float)

    run(dpath('../wrappers/deeptools/bamCoverage'), snakefile, check,
        input_data_func, tmpdir)
コード例 #29
0
def test_featurecounts_se(sample1_se_tiny_bam, annotation, tmpdir):
    snakefile = '''
                rule featurecounts:
                    input:
                        annotation='dm6.gtf',
                        bam='sample1.bam'
                    output:
                        counts='sample1.counts',
                    log: 'featurecounts.log'
                    wrapper: "file:wrapper"
                '''
    input_data_func = symlink_in_tempdir({
        sample1_se_tiny_bam: 'sample1.bam',
        annotation: 'dm6.gtf',
    })

    def check():
        assert '//===================' in open('featurecounts.log').read()
        assert '# Program:featureCounts' in open('sample1.counts').readline()
        assert open('sample1.counts.summary').readline().startswith('Status')
        assert sum(1 for _ in open('sample1.counts')) == 169

    run(dpath('../wrappers/featurecounts'), snakefile, check, input_data_func,
        tmpdir)
コード例 #30
0
def test_infer_experiment(sample1_se_tiny_bam, annotation_bed12, tmpdir):
    snakefile = '''
                rule infer_experiment:
                    input:
                        bam='sample1_R1.bam',
                        bed='dm6.bed12'
                    output:
                        txt = 'sample1_R1.infer_experiment.txt'
                    wrapper: "file:wrapper"
                '''
    input_data_func = symlink_in_tempdir({
        sample1_se_tiny_bam: 'sample1_R1.bam',
        annotation_bed12: 'dm6.bed12'
    })

    def check():
        """
        check for line lengths and that they are at least different sized
        """
        expected = dedent("""\
                This is SingleEnd Data
                Fraction of reads failed to determine:
                Fraction of reads explained by "++,--":
                Fraction of reads explained by "+-,-+":""").splitlines(False)

        with open('sample1_R1.infer_experiment.txt', 'r') as handle:
            results = handle.read().strip()
        for ex in expected:
            assert ex in results

    run(dpath('../wrappers/rseqc/infer_experiment'),
        snakefile,
        check,
        input_data_func,
        tmpdir,
        use_conda=True)