def test_application_fixtures_oneend(): fixtures = config.application_fixtures(application="picard", version="2.9.0", end="pe") f = [fixt for fixt in fixtures if fixt[1] == "picard_CollectInsertSizeMetrics"] assert len(f) == 1 fixtures = config.application_fixtures(application="picard", version="2.9.0", end="se") # should not exist assert len([fixt for fixt in fixtures if fixt[1] == "picard_CollectInsertSizeMetrics"]) == 0
def test_application_fixtures(): fixtures = config.application_fixtures(application="picard", version="2.9.0", end="pe") f = [fixt for fixt in fixtures if fixt[1] == "picard_CollectRrbsMetrics"][0] module, command, version, end, fmtdict = f assert isinstance(fmtdict, dict) assert len(fmtdict.keys()) == 2 assert sorted(fmtdict.keys()) == sorted(['summary', 'detail'])
# Copyright (C) 2015 by Per Unneberg from bioodo import sga, odo, DataFrame from pytest_ngsfixtures.config import application_fixtures import utils fixtures = application_fixtures(application="sga") sga_preprocess_data = utils.fixture_factory( [x for x in fixtures if "preprocess" in x[1]]) sga_filter_data = utils.fixture_factory( [x for x in fixtures if "filter" in x[1]]) sga_aggregate_filter_data = utils.aggregation_fixture_factory( [x for x in fixtures if "filter" in x[1]], 2) def test_sga_preprocess(sga_preprocess_data): module, command, version, end, pdir = sga_preprocess_data df = odo(str(pdir.listdir()[0]), DataFrame) n = 10000 if end == "se" else 20000 assert df.loc["Reads parsed", "value"] == n def test_sga_filter(sga_filter_data): _filter_stats = {'0.10.13': {'se': 9400, 'pe': 16670}} module, command, version, end, pdir = sga_filter_data df = odo(str(pdir.listdir()[0]), DataFrame) assert (df.loc["Reads failed kmer check", "value"] == _filter_stats[version][end]) def test_sga_aggregate_filter(sga_aggregate_filter_data):
# Copyright (C) 2015 by Per Unneberg from bioodo import star, odo, DataFrame from pytest_ngsfixtures.config import application_fixtures import utils fixtures = application_fixtures(application="star") data = utils.fixture_factory(fixtures) aggregate_data = utils.aggregation_fixture_factory([ tuple([x[0], x[1], x[2], x[3], { 'final': x[4]['final'] }]) for x in fixtures ], 2) def test_star_final_log(data): module, command, version, end, pdir = data fn = pdir.join("medium.Log.final.out") df = odo(str(fn), DataFrame) assert df.loc["Number of input reads", "value"] == 30483 def test_star_aggregate(aggregate_data): module, command, version, end, pdir = aggregate_data df = star.aggregate( [str(x.listdir()[0]) for x in pdir.listdir() if x.isdir()], regex=".*/(?P<repeat>[0-9]+)/medium.Log.final.out") assert sorted(list(df["repeat"].unique())) == ['0', '1']
#!/usr/bin/env python3 # -*- coding: utf-8 -*- from pytest_ngsfixtures.config import application_fixtures import utils fixtures = application_fixtures(application="bwa") bwa_data = utils.fixture_factory([x for x in fixtures]) bwa_aggregate_data = utils.aggregation_fixture_factory([x for x in fixtures], 2) def test_bwa(bwa_data): pass
# Copyright (C) 2015 by Per Unneberg from bioodo import bcftools, odo, DataFrame from pytest_ngsfixtures.config import application_fixtures import utils fixtures = application_fixtures(application="bcftools") stat_fixtures = [x for x in fixtures if x[1] == "bcftools_stats"] bcftools_stats = utils.fixture_factory(stat_fixtures) bcftools_aggregate_QUAL_data = utils.aggregation_fixture_factory( [x for x in stat_fixtures], 2) def test_basic_statistics(bcftools_stats): module, command, version, end, pdir = bcftools_stats fn = str(pdir.join("medium.call.stats")) df = odo(fn, DataFrame) assert (list(df.index)[0] == 'number of samples') n = 10667 if end == "pe" else 7400 assert (df.loc["number of records", "value"] == n) def test_TSTV(bcftools_stats): module, command, version, end, pdir = bcftools_stats fn = str(pdir.join("medium.call.stats")) df = odo(fn, DataFrame, key="TSTV") tstv = 2.12 if end == "pe" else 2.19 assert (df.loc[0]["ts/tv"] == tstv) def test_IDD(bcftools_stats):
# Copyright (C) 2015 by Per Unneberg from bioodo import qualimap, odo, DataFrame from pytest_ngsfixtures.config import application_fixtures import utils fixtures = application_fixtures(application="qualimap") # Separate pe and se cases outputs = {'pe': {}, 'se': {}} newfixtures = [] for x in fixtures: for k, v in x[4].items(): y = tuple([x[0], x[1], x[2], x[3], {k: v}]) newfixtures.append(y) data = utils.fixture_factory(newfixtures, scope="function", unique=True) qualimap_aggregate_data = utils.aggregation_fixture_factory( [x for x in newfixtures if "homopolymer_indels" in x[4].keys()], 2) def test_qualimap(data): module, command, version, end, pdir = data if command.startswith("qualimap_bamqc_genome_results"): fn = pdir.listdir()[0] df = odo(str(fn), DataFrame, key='Coverage_per_contig') assert list(df.columns) == ['chrlen', 'mapped_bases', 'mean_coverage', 'sd'] assert list(df.index)[0] == 'scaffold1' else: fn = pdir.listdir()[0]
# Copyright (C) 2015 by Per Unneberg from bioodo import picard, odo, DataFrame from pytest_ngsfixtures.config import application_fixtures import utils fixtures = application_fixtures(application="picard") insert_metrics = utils.fixture_factory( [x for x in fixtures if "CollectInsertSizeMetrics" in x[1]]) align_metrics = utils.fixture_factory( [x for x in fixtures if "CollectAlignmentSummaryMetrics" in x[1]]) aggregate_data_insert = utils.aggregation_fixture_factory( [x for x in fixtures if "CollectInsertSizeMetrics" in x[1]], 2) def test_hist_metrics(insert_metrics): module, command, version, end, pdir = insert_metrics fn = pdir.join("medium.insert_size_metrics") metrics = odo(str(fn), DataFrame) hist = odo(str(fn), DataFrame, key="hist") assert all(metrics["MEDIAN_INSERT_SIZE"] == [367]) assert all(hist["insert_size"][0:3] == [19, 22, 23]) def test_metrics(align_metrics): module, command, version, end, pdir = align_metrics fn = pdir.join("medium.align_metrics") metrics = odo(str(fn), DataFrame) if end == "pe": assert metrics.loc["FIRST_OF_PAIR"]["MEAN_READ_LENGTH"] - 92.29 < 0.01 else: assert metrics.loc["UNPAIRED"]["MEAN_READ_LENGTH"] - 92.29975 < 0.001
# Copyright (C) 2015 by Per Unneberg from bioodo import mapdamage, odo, DataFrame from pytest_ngsfixtures.config import application_fixtures import utils fixtures = application_fixtures(application="mapdamage2") mapdamage_data = utils.fixture_factory(fixtures) mapdamage_agg_data_misincorp = utils.aggregation_fixture_factory( [x for x in fixtures], 2, keys=["misincorp"]) def test_mapdamage_runtime(mapdamage_data): module, command, version, end, pdir = mapdamage_data fn = pdir.join("Runtime_log.txt") odo(str(fn), DataFrame) def test_mapdamage_3pGtoA(mapdamage_data): module, command, version, end, pdir = mapdamage_data fn = pdir.join("3pGtoA_freq.txt") df = odo(str(fn), DataFrame) assert (df.index.name == "pos") def test_mapdamage_5pCtoT(mapdamage_data): module, command, version, end, pdir = mapdamage_data fn = pdir.join("5pCtoT_freq.txt") df = odo(str(fn), DataFrame) assert (df.index.name == "pos")
# Copyright (C) 2015 by Per Unneberg import pytest from bioodo import fastqc, odo, DataFrame from pytest_ngsfixtures.config import application_fixtures import utils fixtures = application_fixtures(application="fastqc") fastqc_data = utils.fixture_factory(fixtures) fastqc_aggregate_data = utils.aggregation_fixture_factory( [x for x in fixtures], 2) def test_basic_statistics(fastqc_data): module, command, version, end, pdir = fastqc_data fn = str(pdir.join("medium_fastqc.zip")) df = odo(fn, DataFrame) major, minor, patch = version.split(".") if int(minor) >= 11: assert(list(df.index) == ['Filename', 'File type', 'Encoding', 'Total Sequences', 'Sequences flagged as poor quality', 'Sequence length', '%GC']) else: assert(list(df.index) == ['Filename', 'File type', 'Encoding', 'Total Sequences', 'Filtered Sequences', 'Sequence length', '%GC']) assert(df.loc["Filename", "Value"] == "medium.bam") def test_summary(fastqc_data):
def test_application_fixture_params(): c = application_fixtures(application="samtools") assert isinstance(c, list)
def test_fileset_fixture_dst(bamset2): flist = sorted( [x.basename for x in bamset2.visit() if x.basename != ".lock"]) assert flist == sorted(dstfiles) flist = sorted( [x.realpath() for x in bamset2.visit() if x.basename != ".lock"]) assert flist[0] == bamfile_realpath ############################## # Applications ############################## # Application test config fixtures = application_fixtures() @pytest.fixture( scope="function", autouse=False, params=fixtures, ids=["{} {}:{}/{}".format(x[0], x[1], x[2], x[3]) for x in fixtures]) def ao(request, tmpdir_factory): app, command, version, end, fmtdict = request.param params = {'version': version, 'end': end} outputs = [fmt.format(**params) for fmt in fmtdict.values()] sources = [os.path.join("applications", app, output) for output in outputs] dests = [os.path.basename(src) for src in sources] fdir = os.path.join(app, str(version), command, end) pdir = safe_mktemp(tmpdir_factory, fdir)
def test_all_application_fixtures_oneend(): fixtures = config.application_fixtures() # Make sure CollectInsertSizeMetrics lacks se case flist = [fixt for fixt in fixtures if fixt[1] == "picard_CollectInsertSizeMetrics" and fixt[2] == "2.9.0"] assert len(flist) == 1
# Copyright (C) 2015 by Per Unneberg import os from bioodo import rseqc, odo, DataFrame from pytest_ngsfixtures.config import application_fixtures import utils blacklist = ["rseqc_junction_annotation", "rseqc_read_duplication"] fixtures = application_fixtures(application="rseqc") fixture_list = [f for f in fixtures if f[1] not in blacklist] data = utils.fixture_factory(fixture_list, scope="function") rseqc_aggregate_data = utils.aggregation_fixture_factory( [x for x in fixture_list], 2) def test_rseqc_parse(data): module, command, version, end, pdir = data fn = pdir.listdir()[0] if command == "rseqc_read_duplication": odo(str(fn), DataFrame) fn = pdir.listdir()[1] odo(str(fn), DataFrame) else: odo(str(fn), DataFrame) def test_rseqc_aggregate(rseqc_aggregate_data): module, command, version, end, pdir = rseqc_aggregate_data infiles = [str(x.listdir()[0]) for x in pdir.listdir() if x.isdir()] df = rseqc.aggregate(infiles, regex=".*/(?P<repeat>[0-9]+)/" + os.path.basename(infiles[0]))
# Copyright (C) 2015 by Per Unneberg from bioodo import cutadapt, odo, DataFrame from pytest_ngsfixtures.config import application_fixtures import utils fixtures = application_fixtures(application="cutadapt") cutadapt_metrics = utils.fixture_factory(fixtures) cutadapt_aggregate_data = utils.aggregation_fixture_factory( [x for x in fixtures], 2) def test_cutadapt(cutadapt_metrics): module, command, version, end, pdir = cutadapt_metrics fn = str(pdir.join("cutadapt_metrics.txt")) df = odo(fn, DataFrame) if end == "se": assert df.loc["Reads with adapters"]["value"] == 792 elif end == "pe": assert df.loc["Read 1 with adapter"]["value"] == 792 def test_cutadapt_aggregate(cutadapt_aggregate_data): module, command, version, end, pdir = cutadapt_aggregate_data df = cutadapt.aggregate( [str(x.listdir()[0]) for x in pdir.listdir() if x.isdir()], regex=".*/(?P<repeat>[0-9]+)/cutadapt_metrics.txt") assert sorted(list(df["repeat"].unique())) == ['0', '1']
# Copyright (C) 2015 by Per Unneberg from bioodo import samtools, odo, DataFrame from pytest_ngsfixtures.config import application_fixtures import utils fixtures = application_fixtures(application="samtools") stat_fixtures = [x for x in fixtures if x[1] == "samtools_stats"] samtools_stats = utils.fixture_factory(stat_fixtures) idxstats_fixtures = [x for x in fixtures if x[1] == "samtools_idxstats"] samtools_idxstats = utils.fixture_factory(idxstats_fixtures) def test_basic_statistics(samtools_stats): _stats = { '1.2': { 'se': 60037, 'pe': 120110 }, '1.3.1': { 'se': 60000, 'pe': 120000 }, '1.4.1': { 'se': 60000, 'pe': 120000 } } module, command, version, end, pdir = samtools_stats fn = str(pdir.join("medium.stats.txt"))
# Copyright (C) 2015 by Per Unneberg from bioodo import vsearch, odo, DataFrame from pytest_ngsfixtures.config import application_fixtures import utils blacklist = ['vsearch_fastqc_filter'] fixtures = [x for x in application_fixtures(application="vsearch") if x[1] not in blacklist] data = utils.fixture_factory(fixtures) aggregate_data = utils.aggregation_fixture_factory( fixtures, 2) def test_vsearch_fastq_stats(data): module, command, version, end, pdir = data fn = pdir.join("medium.fastq_stats.txt") df = odo(str(fn), DataFrame) assert list(df.columns) == ["N", "Pct", "AccPct"] assert df.index.name == "L" df = odo(str(fn), DataFrame, key="Truncate at first Q") assert list(df.columns) == ["Q=5", "Q=10", "Q=15", "Q=20"] assert df.index.name == "Len" def test_vsearch_aggregate(aggregate_data): module, command, version, end, pdir = aggregate_data df = vsearch.aggregate( [str(x.listdir()[0]) for x in pdir.listdir() if x.isdir()], regex=".*/(?P<repeat>[0-9]+)/medium.fastq_stats.txt") assert sorted(list(df["repeat"].unique())) == ['0', '1']