def test_application_fixtures_oneend():
    fixtures = config.application_fixtures(application="picard", version="2.9.0", end="pe")
    f = [fixt for fixt in fixtures if fixt[1] == "picard_CollectInsertSizeMetrics"]
    assert len(f) == 1
    fixtures = config.application_fixtures(application="picard", version="2.9.0", end="se")
    # should not exist
    assert len([fixt for fixt in fixtures if fixt[1] == "picard_CollectInsertSizeMetrics"]) == 0
def test_application_fixtures():
    fixtures = config.application_fixtures(application="picard", version="2.9.0", end="pe")
    f = [fixt for fixt in fixtures if fixt[1] == "picard_CollectRrbsMetrics"][0]
    module, command, version, end, fmtdict = f
    assert isinstance(fmtdict, dict)
    assert len(fmtdict.keys()) == 2
    assert sorted(fmtdict.keys()) == sorted(['summary', 'detail'])
예제 #3
0
파일: test_sga.py 프로젝트: percyfal/bioodo
# Copyright (C) 2015 by Per Unneberg
from bioodo import sga, odo, DataFrame
from pytest_ngsfixtures.config import application_fixtures
import utils

fixtures = application_fixtures(application="sga")

sga_preprocess_data = utils.fixture_factory(
    [x for x in fixtures if "preprocess" in x[1]])
sga_filter_data = utils.fixture_factory(
    [x for x in fixtures if "filter" in x[1]])
sga_aggregate_filter_data = utils.aggregation_fixture_factory(
    [x for x in fixtures if "filter" in x[1]], 2)


def test_sga_preprocess(sga_preprocess_data):
    module, command, version, end, pdir = sga_preprocess_data
    df = odo(str(pdir.listdir()[0]), DataFrame)
    n = 10000 if end == "se" else 20000
    assert df.loc["Reads parsed", "value"] == n


def test_sga_filter(sga_filter_data):
    _filter_stats = {'0.10.13': {'se': 9400, 'pe': 16670}}
    module, command, version, end, pdir = sga_filter_data
    df = odo(str(pdir.listdir()[0]), DataFrame)
    assert (df.loc["Reads failed kmer check",
                   "value"] == _filter_stats[version][end])


def test_sga_aggregate_filter(sga_aggregate_filter_data):
예제 #4
0
# Copyright (C) 2015 by Per Unneberg
from bioodo import star, odo, DataFrame
from pytest_ngsfixtures.config import application_fixtures
import utils

fixtures = application_fixtures(application="star")
data = utils.fixture_factory(fixtures)
aggregate_data = utils.aggregation_fixture_factory([
    tuple([x[0], x[1], x[2], x[3], {
        'final': x[4]['final']
    }]) for x in fixtures
], 2)


def test_star_final_log(data):
    module, command, version, end, pdir = data
    fn = pdir.join("medium.Log.final.out")
    df = odo(str(fn), DataFrame)
    assert df.loc["Number of input reads", "value"] == 30483


def test_star_aggregate(aggregate_data):
    module, command, version, end, pdir = aggregate_data
    df = star.aggregate(
        [str(x.listdir()[0]) for x in pdir.listdir() if x.isdir()],
        regex=".*/(?P<repeat>[0-9]+)/medium.Log.final.out")
    assert sorted(list(df["repeat"].unique())) == ['0', '1']
예제 #5
0
파일: test_bwa.py 프로젝트: percyfal/bioodo
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from pytest_ngsfixtures.config import application_fixtures
import utils

fixtures = application_fixtures(application="bwa")
bwa_data = utils.fixture_factory([x for x in fixtures])
bwa_aggregate_data = utils.aggregation_fixture_factory([x for x in fixtures],
                                                       2)


def test_bwa(bwa_data):
    pass
예제 #6
0
# Copyright (C) 2015 by Per Unneberg
from bioodo import bcftools, odo, DataFrame
from pytest_ngsfixtures.config import application_fixtures
import utils

fixtures = application_fixtures(application="bcftools")

stat_fixtures = [x for x in fixtures if x[1] == "bcftools_stats"]
bcftools_stats = utils.fixture_factory(stat_fixtures)
bcftools_aggregate_QUAL_data = utils.aggregation_fixture_factory(
    [x for x in stat_fixtures], 2)


def test_basic_statistics(bcftools_stats):
    module, command, version, end, pdir = bcftools_stats
    fn = str(pdir.join("medium.call.stats"))
    df = odo(fn, DataFrame)
    assert (list(df.index)[0] == 'number of samples')
    n = 10667 if end == "pe" else 7400
    assert (df.loc["number of records", "value"] == n)


def test_TSTV(bcftools_stats):
    module, command, version, end, pdir = bcftools_stats
    fn = str(pdir.join("medium.call.stats"))
    df = odo(fn, DataFrame, key="TSTV")
    tstv = 2.12 if end == "pe" else 2.19
    assert (df.loc[0]["ts/tv"] == tstv)


def test_IDD(bcftools_stats):
예제 #7
0
# Copyright (C) 2015 by Per Unneberg
from bioodo import qualimap, odo, DataFrame
from pytest_ngsfixtures.config import application_fixtures
import utils


fixtures = application_fixtures(application="qualimap")

# Separate pe and se cases
outputs = {'pe': {}, 'se': {}}
newfixtures = []
for x in fixtures:
    for k, v in x[4].items():
        y = tuple([x[0], x[1], x[2], x[3], {k: v}])
        newfixtures.append(y)

data = utils.fixture_factory(newfixtures, scope="function", unique=True)
qualimap_aggregate_data = utils.aggregation_fixture_factory(
    [x for x in newfixtures if "homopolymer_indels" in x[4].keys()], 2)


def test_qualimap(data):
    module, command, version, end, pdir = data
    if command.startswith("qualimap_bamqc_genome_results"):
        fn = pdir.listdir()[0]
        df = odo(str(fn), DataFrame, key='Coverage_per_contig')
        assert list(df.columns) == ['chrlen', 'mapped_bases',
                                    'mean_coverage', 'sd']
        assert list(df.index)[0] == 'scaffold1'
    else:
        fn = pdir.listdir()[0]
예제 #8
0
# Copyright (C) 2015 by Per Unneberg
from bioodo import picard, odo, DataFrame
from pytest_ngsfixtures.config import application_fixtures
import utils

fixtures = application_fixtures(application="picard")
insert_metrics = utils.fixture_factory(
    [x for x in fixtures if "CollectInsertSizeMetrics" in x[1]])
align_metrics = utils.fixture_factory(
    [x for x in fixtures if "CollectAlignmentSummaryMetrics" in x[1]])
aggregate_data_insert = utils.aggregation_fixture_factory(
    [x for x in fixtures if "CollectInsertSizeMetrics" in x[1]], 2)


def test_hist_metrics(insert_metrics):
    module, command, version, end, pdir = insert_metrics
    fn = pdir.join("medium.insert_size_metrics")
    metrics = odo(str(fn), DataFrame)
    hist = odo(str(fn), DataFrame, key="hist")
    assert all(metrics["MEDIAN_INSERT_SIZE"] == [367])
    assert all(hist["insert_size"][0:3] == [19, 22, 23])


def test_metrics(align_metrics):
    module, command, version, end, pdir = align_metrics
    fn = pdir.join("medium.align_metrics")
    metrics = odo(str(fn), DataFrame)
    if end == "pe":
        assert metrics.loc["FIRST_OF_PAIR"]["MEAN_READ_LENGTH"] - 92.29 < 0.01
    else:
        assert metrics.loc["UNPAIRED"]["MEAN_READ_LENGTH"] - 92.29975 < 0.001
예제 #9
0
# Copyright (C) 2015 by Per Unneberg
from bioodo import mapdamage, odo, DataFrame
from pytest_ngsfixtures.config import application_fixtures
import utils

fixtures = application_fixtures(application="mapdamage2")
mapdamage_data = utils.fixture_factory(fixtures)
mapdamage_agg_data_misincorp = utils.aggregation_fixture_factory(
    [x for x in fixtures], 2, keys=["misincorp"])


def test_mapdamage_runtime(mapdamage_data):
    module, command, version, end, pdir = mapdamage_data
    fn = pdir.join("Runtime_log.txt")
    odo(str(fn), DataFrame)


def test_mapdamage_3pGtoA(mapdamage_data):
    module, command, version, end, pdir = mapdamage_data
    fn = pdir.join("3pGtoA_freq.txt")
    df = odo(str(fn), DataFrame)
    assert (df.index.name == "pos")


def test_mapdamage_5pCtoT(mapdamage_data):
    module, command, version, end, pdir = mapdamage_data
    fn = pdir.join("5pCtoT_freq.txt")
    df = odo(str(fn), DataFrame)
    assert (df.index.name == "pos")

예제 #10
0
# Copyright (C) 2015 by Per Unneberg
import pytest
from bioodo import fastqc, odo, DataFrame

from pytest_ngsfixtures.config import application_fixtures
import utils

fixtures = application_fixtures(application="fastqc")
fastqc_data = utils.fixture_factory(fixtures)
fastqc_aggregate_data = utils.aggregation_fixture_factory(
    [x for x in fixtures], 2)


def test_basic_statistics(fastqc_data):
    module, command, version, end, pdir = fastqc_data
    fn = str(pdir.join("medium_fastqc.zip"))
    df = odo(fn, DataFrame)
    major, minor, patch = version.split(".")
    if int(minor) >= 11:
        assert(list(df.index) == ['Filename', 'File type', 'Encoding',
                                  'Total Sequences',
                                  'Sequences flagged as poor quality',
                                  'Sequence length', '%GC'])
    else:
        assert(list(df.index) == ['Filename', 'File type', 'Encoding',
                                  'Total Sequences', 'Filtered Sequences',
                                  'Sequence length', '%GC'])
    assert(df.loc["Filename", "Value"] == "medium.bam")


def test_summary(fastqc_data):
예제 #11
0
def test_application_fixture_params():
    c = application_fixtures(application="samtools")
    assert isinstance(c, list)
예제 #12
0

def test_fileset_fixture_dst(bamset2):
    flist = sorted(
        [x.basename for x in bamset2.visit() if x.basename != ".lock"])
    assert flist == sorted(dstfiles)
    flist = sorted(
        [x.realpath() for x in bamset2.visit() if x.basename != ".lock"])
    assert flist[0] == bamfile_realpath


##############################
# Applications
##############################
# Application test config
fixtures = application_fixtures()


@pytest.fixture(
    scope="function",
    autouse=False,
    params=fixtures,
    ids=["{} {}:{}/{}".format(x[0], x[1], x[2], x[3]) for x in fixtures])
def ao(request, tmpdir_factory):
    app, command, version, end, fmtdict = request.param
    params = {'version': version, 'end': end}
    outputs = [fmt.format(**params) for fmt in fmtdict.values()]
    sources = [os.path.join("applications", app, output) for output in outputs]
    dests = [os.path.basename(src) for src in sources]
    fdir = os.path.join(app, str(version), command, end)
    pdir = safe_mktemp(tmpdir_factory, fdir)
def test_all_application_fixtures_oneend():
    fixtures = config.application_fixtures()
    # Make sure CollectInsertSizeMetrics lacks se case
    flist = [fixt for fixt in fixtures if fixt[1] == "picard_CollectInsertSizeMetrics" and fixt[2] == "2.9.0"]
    assert len(flist) == 1
예제 #14
0
# Copyright (C) 2015 by Per Unneberg
import os
from bioodo import rseqc, odo, DataFrame
from pytest_ngsfixtures.config import application_fixtures
import utils

blacklist = ["rseqc_junction_annotation", "rseqc_read_duplication"]
fixtures = application_fixtures(application="rseqc")
fixture_list = [f for f in fixtures if f[1] not in blacklist]
data = utils.fixture_factory(fixture_list, scope="function")
rseqc_aggregate_data = utils.aggregation_fixture_factory(
    [x for x in fixture_list], 2)


def test_rseqc_parse(data):
    module, command, version, end, pdir = data
    fn = pdir.listdir()[0]
    if command == "rseqc_read_duplication":
        odo(str(fn), DataFrame)
        fn = pdir.listdir()[1]
        odo(str(fn), DataFrame)
    else:
        odo(str(fn), DataFrame)


def test_rseqc_aggregate(rseqc_aggregate_data):
    module, command, version, end, pdir = rseqc_aggregate_data
    infiles = [str(x.listdir()[0]) for x in pdir.listdir() if x.isdir()]
    df = rseqc.aggregate(infiles,
                         regex=".*/(?P<repeat>[0-9]+)/" +
                         os.path.basename(infiles[0]))
예제 #15
0
# Copyright (C) 2015 by Per Unneberg
from bioodo import cutadapt, odo, DataFrame
from pytest_ngsfixtures.config import application_fixtures
import utils

fixtures = application_fixtures(application="cutadapt")
cutadapt_metrics = utils.fixture_factory(fixtures)
cutadapt_aggregate_data = utils.aggregation_fixture_factory(
    [x for x in fixtures], 2)


def test_cutadapt(cutadapt_metrics):
    module, command, version, end, pdir = cutadapt_metrics
    fn = str(pdir.join("cutadapt_metrics.txt"))
    df = odo(fn, DataFrame)
    if end == "se":
        assert df.loc["Reads with adapters"]["value"] == 792
    elif end == "pe":
        assert df.loc["Read 1 with adapter"]["value"] == 792


def test_cutadapt_aggregate(cutadapt_aggregate_data):
    module, command, version, end, pdir = cutadapt_aggregate_data
    df = cutadapt.aggregate(
        [str(x.listdir()[0]) for x in pdir.listdir() if x.isdir()],
        regex=".*/(?P<repeat>[0-9]+)/cutadapt_metrics.txt")
    assert sorted(list(df["repeat"].unique())) == ['0', '1']
예제 #16
0
# Copyright (C) 2015 by Per Unneberg
from bioodo import samtools, odo, DataFrame
from pytest_ngsfixtures.config import application_fixtures
import utils

fixtures = application_fixtures(application="samtools")

stat_fixtures = [x for x in fixtures if x[1] == "samtools_stats"]
samtools_stats = utils.fixture_factory(stat_fixtures)

idxstats_fixtures = [x for x in fixtures if x[1] == "samtools_idxstats"]
samtools_idxstats = utils.fixture_factory(idxstats_fixtures)


def test_basic_statistics(samtools_stats):
    _stats = {
        '1.2': {
            'se': 60037,
            'pe': 120110
        },
        '1.3.1': {
            'se': 60000,
            'pe': 120000
        },
        '1.4.1': {
            'se': 60000,
            'pe': 120000
        }
    }
    module, command, version, end, pdir = samtools_stats
    fn = str(pdir.join("medium.stats.txt"))
예제 #17
0
# Copyright (C) 2015 by Per Unneberg
from bioodo import vsearch, odo, DataFrame
from pytest_ngsfixtures.config import application_fixtures
import utils

blacklist = ['vsearch_fastqc_filter']
fixtures = [x for x in application_fixtures(application="vsearch")
            if x[1] not in blacklist]
data = utils.fixture_factory(fixtures)
aggregate_data = utils.aggregation_fixture_factory(
    fixtures, 2)


def test_vsearch_fastq_stats(data):
    module, command, version, end, pdir = data
    fn = pdir.join("medium.fastq_stats.txt")
    df = odo(str(fn), DataFrame)
    assert list(df.columns) == ["N", "Pct", "AccPct"]
    assert df.index.name == "L"
    df = odo(str(fn), DataFrame, key="Truncate at first Q")
    assert list(df.columns) == ["Q=5", "Q=10", "Q=15", "Q=20"]
    assert df.index.name == "Len"


def test_vsearch_aggregate(aggregate_data):
    module, command, version, end, pdir = aggregate_data
    df = vsearch.aggregate(
        [str(x.listdir()[0]) for x in pdir.listdir()
         if x.isdir()],
        regex=".*/(?P<repeat>[0-9]+)/medium.fastq_stats.txt")
    assert sorted(list(df["repeat"].unique())) == ['0', '1']